Lex和Yacc符号表的生成和操作

3
我正在尝试使用lex进行词法分析,使用yacc进行语法分析来构建一个简单的C编译器。我在lex中构建符号表,并用所有标识符(仅包括标识符的名称、行号和范围)来填充它,在词法分析中遇到。符号表本身采用链表形式,指针指向链表头。现在,我想能够在yacc中访问这个符号表,以更新每个标识符的值和数据类型。我该如何在yacc中访问符号表?
我已经在lex中将头指针定义为extern,但没有起到作用。以下是完整的参考代码:
Lex(ngrammar.l)-
%{
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    #define MAX 1000
    extern int yylval;
    int scope = 0;
    int lineno = 1;
    int paran = 0;
    typedef struct node{
        int index;
        int scope;
        char symbol[MAX];
        char lineno[MAX];
        char type[MAX];
        char value[MAX];
        struct node* next; 
    }node;
    extern node* head;
    head = NULL;
    node* InsertSymbol(node*,int,char*,int,int);
    void display(node* head);
    node* InsertSymbol(node* head, int scope, char* symbol, int lineno, int paran){
        if(paran>0){
            scope++;
        }
        else{;
        }
        if(head == NULL){
            node* temp = (node*)malloc(sizeof(node));
            temp->index = 1;
            temp->scope = scope;
            strcpy(temp->symbol,symbol);
            char str[4];
            sprintf(str, "%d", lineno);
            strcpy(temp->lineno,str);
            temp->next = NULL;
            head = temp;
        }
        else{
            node* cur= head;
            node* prev = NULL;
            int present = 0;
            while(cur != NULL){
                if((cur->scope == scope) && (strcmp(cur->symbol,symbol)==0)){
                    char str[4];
                    sprintf(str, ", %d", lineno);
                    strcat(cur->lineno,str);
                    present = 1;
                }
                prev = cur;
                cur = cur->next;
            }
            if(present == 0){
                node* temp = (node*)malloc(sizeof(node));
                temp->index = (prev->index)+1;
                temp->scope = scope;
                strcpy(temp->symbol,symbol);
                char str[4];
                sprintf(str, "%d", lineno);
                strcpy(temp->lineno,str);
                temp->next = NULL;
                prev->next = temp;
            }
        }
        return head;
    }
    void display(node* head){
        node* p = head;
        printf("\t\t\t SYMBOL TABLE \t\t\t\n\n");
        printf("\t Index \t\t Symbol \t Scope \t\t Line Number \n");
        if(p == NULL){
            printf("nothing");
            return;
        }
        else{
            while(p != NULL){
            //printf("enter");
                printf("\t %d \t\t %s \t\t %d \t\t %s \n",p->index,p->symbol,p->scope,p->lineno);
                p = p->next;
            }
        }
    }
%}

alpha [A-Za-z]
digit [0-9]
und [_]
space [ ]
tab [   ]
line [\n]
acc [^"*/"]
str [^\"]

%%
\/\/(.*)[\n]* {;}
\/\*({acc}*\n)*{acc}*\*\/[\n]* {;}
for {return FOR;}
if {return IF;}
char {return CHAR;}
float {return FLOAT;}
int {return INT;}
continue {return CONTINUE;}
return {return RETURN;}
bool {return BOOL;}
main {return MAIN;}
else {return ELSE;}
printf {return PRINT;}
break {return BREAK;}
TRUE {return BOOLTRUE;}
FALSE {return BOOLFALSE;}
\|\| {return OR;}
\%d|\%c|\%f {return FORMATSPEC;}
\( {return OPENBRACES;paran++;}
\) {return CLOSEBRACES;paran--;}
\{ {return OPENCURLYBRACES;scope++;}
\} {return CLOSECURLYBRACES;scope--;}
[.] {return DOT;}
= {return ASSIGNOP;}
\+\+|\-\- {return UNARYOP;}
\+|\-|\*|\/ {return ARITHOP;}
>|<|>=|<=|!=|== {return RELOP;}
&& {return AND;}
[,] {return COMMA;}
[;] {return SEMICOLON;}
['] {return SINGLEQUOTES;}
["] {return DOUBLEQUOTES;}
["]{str}*["] {return STRING;}
{alpha}({alpha}|{digit}|{und})* {return IDENTIFIER;
                                head = InsertSymbol(head,scope,yytext,lineno,paran);}
{digit}+ {return INTCONST;}
({digit}+)\.({digit}+) {return FLOATCONST;}
[\n] {lineno++;}
[\t] {;}
[ ] {;}
. {return yytext[0];}
%%

int yywrap()
{
    return 1;
}

Yacc (ngrammar.y) -

%{
    #include<stdio.h>
    #include<string.h>
    int valid=1;
%}

%token CHAR INT FLOAT BOOL MAIN FOR IF ELSE PRINT BREAK CONTINUE BOOLTRUE BOOLFALSE FORMATSPEC OPENBRACES CLOSEBRACES OPENCURLYBRACES CLOSECURLYBRACES UNARYOP ARITHOP RELOP ASSIGNOP COMMA SEMICOLON SINGLEQUOTES DOUBLEQUOTES UNDERSCORE RETURN AND OR DOT STRING IDENTIFIER INTCONST FLOATCONST

%%
start: INT MAIN OPENBRACES CLOSEBRACES OPENCURLYBRACES statement return_statement CLOSECURLYBRACES;
statement: compound_statement | expression_statement | jump_statement | print_statement | for_loop | if_else;
for_loop: FOR OPENBRACES declaration condition_statement SEMICOLON for_expression_statement CLOSEBRACES statement;
if_else: IF OPENBRACES condition_statement CLOSEBRACES statement ELSE statement;
return_statement: RETURN identifier SEMICOLON | RETURN intconstant SEMICOLON;

declaration: declaration_specifiers init_declarator_list SEMICOLON | init_declarator_list SEMICOLON;
declaration_specifiers: CHAR | INT | FLOAT | BOOL;
init_declarator_list: init_declarator | init_declarator COMMA init_declarator_list;
init_declarator: identifier | identifier ASSIGNOP initializer;
initializer: numconstant | identifier;

compound_statement: OPENCURLYBRACES CLOSECURLYBRACES | OPENCURLYBRACES block_item_list CLOSECURLYBRACES | block_item_list;
block_item_list: block_item block_item | block_item;
block_item: declaration | statement |;
expression_statement: identifier ASSIGNOP expression SEMICOLON | identifier UNARYOP SEMICOLON | UNARYOP identifier SEMICOLON;
expression: identifier ARITHOP identifier | identifier ARITHOP numconstant | numconstant ARITHOP numconstant;
for_expression_statement: identifier ASSIGNOP expression  | identifier UNARYOP | UNARYOP identifier;

condition_statement: identifier RELOP identifier | identifier RELOP numconstant | numconstant RELOP identifier | condition_statement AND condition_statement | condition_statement OR condition_statement | BOOLTRUE | BOOLFALSE;
jump_statement: BREAK SEMICOLON | CONTINUE SEMICOLON;

print_statement: PRINT OPENBRACES stringconstant CLOSEBRACES SEMICOLON | PRINT OPENBRACES DOUBLEQUOTES FORMATSPEC DOUBLEQUOTES COMMA identifier CLOSEBRACES SEMICOLON;

identifier: IDENTIFIER;
intconstant: INTCONST;
numconstant: INTCONST | FLOATCONST;
stringconstant: STRING;
%%
#include "lex.yy.c"
int yyerror()
{
    printf("\nDoes not satisfy the grammar\n");
    valid=0;
    return 0;
}
int main()
{
    yyin = fopen("in.c","r");
    yyparse();
    if(valid)
    {
        printf("\nSatisfies the grammar \n");
    }
    display(head);  
}


以下截图显示警告和执行输出: 进入图像描述 进入图像描述 (我还没有编写更新值和数据类型的代码。这只是为了在yacc中显示符号表。)

3
请勿使用截图,因为在移动设备上阅读非常困难,并且无法从中复制以用于解释答案。请将错误信息作为(文本)代码块粘贴到您的问题中。谢谢。 - rici
1个回答

3

C语言程序的顶层只能包含声明和定义,不能包含语句。因此,下面的内容在C语言程序的顶层是不合法的:

extern node* head;
head = NULL;

由于 GCC 仍然允许预标准 C 的声明具有隐式类型(类型默认为 int),因此第二行会被转换为声明。虽然它会发出警告,但这个警告并没有太大的用处。

总之,上述代码的解释就好像它本应该是这样的:

extern node* head;
int head = NULL;

由于在全局定义中不能给两个相同的名称,因此这是明显违法的。然后GCC继续使用第二个定义,并且其余错误会级联发生。

这与flex或bison无关,与您的符号表实现几乎没有关系,我没有查看。但这很好地说明了为什么在编写解析器时应考虑生成良好的错误消息。


还有一些其他问题。您需要为token FOR、yyerror()等声明前向声明。相反,使用Flex和Bison,并为每个生成一个头文件,然后在.l和.y文件的prologue(第一个%%之间的代码)中包含这些头文件。在此处添加yyerror()的前向声明。不要在.y文件中包含lex.yy.c。单独编译每个.c文件。此外,您还有shift/shift和shift/reduce错误。 - kaby76
@kaby76:确实,那段代码存在许多问题。然而,本站的组织理念是一个问题及其答案应该解决一个具体的问题,从而使它们对其他遇到相同问题的程序员有用。这个问题确实很具体,这很好,我试图以这些术语回答它,相信如果OP无法自行解决其他问题,他们会开另一个问题来解决。 - rici

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接