使用Lex和Yacc打印标记

4
我有一个lex文件、一个yacc文件和一个main.cpp文件。
我的main.cpp长这样:
int main(int argc, char **argv)
{
    if (argc == 1)
    {   int token;
        curr_filename = "<stdin>";
        yyin = stdin;
        yyparse();
    }
    else
    {
        for (int i = 1; i < argc; ++i)
        {
            curr_filename = argv[i];
            yyin = std::fopen(argv[i], "r");

            if (yyin)
            {    

                yyparse();  

                std::fclose(yyin);
            }
            else
            {
                utility::print_error(argv[i], "cannot be opened");
            }
        }
    }

    if (yynerrs > 0)
    {
        std::cerr << "Compilation halted due to lexical or syntax errors.\n";
        exit(1);
    }

这有助于进行解析。但现在我想打印从 lex 文件生成的令牌。因此,我通过以下方式对其进行了一些修改,调用 yylex()。
    int main(int argc, char **argv)
    {
        if (argc == 1)
        {   int token;
            curr_filename = "<stdin>";

       yyin = stdin;
// calling yylex to get token 
     while(token= yylex())
     {
        switch(token){
        case 258 : 
        std::cout << "class" ;
        default : 
        std::cout << "token " ;

                 }


            yyparse();
        }
//rest of the code same

但是没有任何东西被打印到输出。有什么帮助,如何将标记打印到标准输出或文件中?Flex文件。
%option noyywrap
%option yylineno

%{

#include "flexbison.hpp"
#include "tokentable.hpp"
#include "symboltable.hpp"
#include "y.tab.h"
#include <stdio.h>

#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; 

static const int MAX_STR_CONST = 1025;

char string_buf[MAX_STR_CONST];  // buffer to store string contstants encountered in source file
char *string_buf_ptr;



int num_comment = 0;      // count to keep track how many opening comment tokens have been encountered
std::size_t curr_lineno = 0;      // keep track of current line number of source file
bool str_too_long = false;   // used to handle string constant size error check


%}

%x COMMENT
%x LINECOMMENT
%x STRING

DARROW =>

%%

"(*" {
    BEGIN(COMMENT);
    num_comment++;
}

"*)" {
    if (num_comment <= 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }
}

<COMMENT>"*)" {
    num_comment--;
    if (num_comment < 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }

    if (num_comment == 0) {
        BEGIN(INITIAL);
    }
}

<COMMENT>"(*" {
    num_comment++;
}

<COMMENT>[^\n] {
    // eat everything within comments
}

<COMMENT>\n {
    ++curr_lineno;
}

"--"[^\n]* {
    BEGIN(LINECOMMENT);
}

<LINECOMMENT>\n {
    ++curr_lineno;
    BEGIN(INITIAL);
}

<COMMENT><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in comment";
    return ERROR;
}

"=>" {
    return DARROW; 
}

(?i:class) {
    return CLASS;
}

(?i:else) {
    return ELSE;
}

(?i:in) {
    return IN;
}

(?i:then) {
    return THEN;
}

(?i:fi) {
    return FI;
}

(?i:if) {
    return IF;
}

(?i:inherits) {
    return INHERITS;
}

(?i:let) {
    return LET;
}

(?i:loop) {
    return LOOP;
}

(?i:pool) {
    return POOL;
}

(?i:while) {
    return WHILE;
}

(?i:case) {
    return CASE;
}

(?i:esac) {
    return ESAC;
}

(?i:of) {
    return OF;
}

(?i:new) {
    return NEW;
}

(?i:isvoid) {
    return ISVOID;
}

(?i:not) {
    return NOT;
}

t(?i:rue) {
    yylval.boolean = true;
    return BOOL_CONST;
}

f(?i:alse) {
    yylval.boolean = false;
    return BOOL_CONST;
}

[0-9]+ {
    yylval.symbol = inttable().add(yytext);
    return INT_CONST;
}

"<=" {
    return LE;
}

"<-" {
    return ASSIGN;
}


[A-Z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return TYPEID;
}


[a-z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return OBJECTID;
}

";"|","|"{"|"}"|":"|"("|")"|"+"|"-"|"*"|"/"|"="|"~"|"<"|"."|"@" { 
    return *yytext;
}

\n {
    ++curr_lineno;
}

[ \f\r\t\v] {
    // eat whitespace
}

 /*
  *  String constants (C syntax)
  *  Escape sequence \c is accepted for all characters c. Except for 
  *  \n \t \b \f, the result is c.
  *
  */

\" {
    BEGIN(STRING);
    string_buf_ptr = string_buf;
    memset(string_buf, 0, MAX_STR_CONST);
}

<STRING>\" {
    BEGIN(INITIAL);
    yylval.symbol = stringtable().add(string_buf);
    return STR_CONST;
}

<STRING>\0[^\n]*\" {
    BEGIN(INITIAL);
    if (str_too_long) {
        str_too_long = false;
    }
    else {
        yylval.error_msg = "String contains null character";
        return ERROR;
    }
}

<STRING>\0[^"]*\n {
    if (str_too_long) {
        yyinput(); /* eat quote */
        BEGIN(INITIAL);
        str_too_long = false;
    }
    else {
        if (yytext[yyleng - 1] != '\\') {
            BEGIN(INITIAL);
            yylval.error_msg = "String contains null character";
            return ERROR;
        }
    }
}

<STRING><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in string constant";
    return ERROR;
}

<STRING>\\ {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('\0');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }

    char ahead = yyinput();
    switch (ahead) {
        case 'b':
            *string_buf_ptr++ = '\b';
            break;
        case 't':
            *string_buf_ptr++ = '\t';
            break;
        case 'n':
            *string_buf_ptr++ = '\n';
            break;
        case 'f':
            *string_buf_ptr++ = '\f';
            break;
        case '\n':
            ++curr_lineno;
            *string_buf_ptr++ = '\n';
            break;
        case '\0':
            unput(ahead);
            break;
        default:
            *string_buf_ptr++ = ahead;
    }
}

<STRING>\n {
    ++curr_lineno;
    BEGIN(INITIAL);
    yylval.error_msg = "Unterminated string constant";
    return ERROR;
}

<STRING>. {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('\0');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }

    *string_buf_ptr++ = *yytext;
}

. /* error for invalid tokens */ {
    yylval.error_msg = std::string(yytext) + " is not a valid character in the current context.";
    return ERROR;
}

%%
bison文件
%{

#include "flexbison.hpp"
#include "symboltable.hpp"
#include "tokentable.hpp"
#include "ast.hpp"

#include <iostream>

// convinience function for setting location of each ast node
#define SETLOC(lval,node) (lval)->setloc((node).first_line, curr_filename)

// both defined in main.cpp
extern ProgramPtr ast_root;
extern std::string curr_filename;

// both defined in lexer
extern int yylex();
extern int yylineno;

void yyerror(char *);        
%}

%token CLASS 258 ELSE 259 FI 260 IF 261 IN 262 
%token INHERITS 263 LET 264 LOOP 265 POOL 266 THEN 267 WHILE 268
%token CASE 269 ESAC 270 OF 271 DARROW 272 NEW 273 ISVOID 274
%token <symbol>  STR_CONST 275 INT_CONST 276 
%token <boolean> BOOL_CONST 277
%token <symbol>  TYPEID 278 OBJECTID 279 
%token ASSIGN 280 NOT 281 LE 282 ERROR 283

%type <program> program
%type <clazz> class
%type <classes> class_list
%type <attribute> attribute
%type <attributes> attribute_list
%type <method> method
%type <methods> method_list
%type <expression> expression
%type <expression> let_expr 
%type <expressions> expression_list
%type <expressions> method_expr_list
%type <formal> formal
%type <formals> formal_list
%type <branch> case
%type <cases> case_list

%nonassoc '='
%left LET
%right ASSIGN
%left NOT
%left '+' '-'
%left '*' '/' 
%left ISVOID
%left '~'
%left '@'
%left '.'
%nonassoc LE '<'

%%
program : class_list    { @$ = @1; ast_root = std::make_shared<Program>($1); }
;

class_list : class { $$ = Classes(); $$.push_back($1); }
            | class_list class { $$.push_back($2); }
;

class : CLASS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, idtable().add("Object"), $4, $5); SETLOC($$, @1); }
        | CLASS TYPEID INHERITS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, $4, $6, $7); SETLOC($$, @1); }
        | error ';' { yyerrok; } 
;

attribute_list : attribute ';' { $$ = Attributes(); $$.push_back($1); }
               | attribute_list attribute ';' { $$.push_back($2); }
               | error ';' { yyerrok; }
;

attribute : OBJECTID ':' TYPEID { $$ = std::make_shared<Attribute>($1, $3, std::make_shared<NoExpr>()); SETLOC($$, @1); }
          | OBJECTID ':' TYPEID ASSIGN expression { $$ = std::make_shared<Attribute>($1, $3, $5); SETLOC($$, @5); }
;

method_list : method ';' { $$ = Methods(); $$.push_back($1); }
            | method_list method ';' { $$.push_back($2); }
            | error ';' { yyerrok; }
;

method : OBJECTID '(' formal_list ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $6, $3, $8); SETLOC($$, @1); }
       | OBJECTID '(' ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $5, Formals(), $7); SETLOC($$, @1); }
;

formal_list : formal { $$ = Formals(); $$.push_back($1); }
            | formal_list ',' formal { $$.push_back($3); } 
;

formal : OBJECTID ':' TYPEID { $$ = std::make_shared<Formal>($1, $3); SETLOC($$, @1); }
;

case_list : case { $$ = Cases(); $$.push_back($1); }
            | case_list case { $$.push_back($2); }
;

case : OBJECTID ':' TYPEID DARROW expression ';' { $$ = std::make_shared<CaseBranch>($1, $3, $5); SETLOC($$, @5); }
;

method_expr_list : expression { $$ = Expressions(); $$.push_back($1); }
                    | method_expr_list ',' expression { $$.push_back($3); }
;

expression_list : expression ';' { $$ = Expressions(); $$.push_back($1); }
                | expression_list expression ';' { $$.push_back($2); }
                | error ';' { yyerrok; }
;

let_expr : OBJECTID ':' TYPEID IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ',' let_expr { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression ',' let_expr { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @4); }
            | error ',' let_expr { yyerrok; }
;


expression : OBJECTID ASSIGN expression { $$ = std::make_shared<Assign>($1, $3); SETLOC($$, @3); }
            | expression '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, $5); SETLOC($$, @1); }
            | expression '.' OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, Expressions()); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, $7); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, Expressions()); SETLOC($$, @1);}
            | OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, $3); 
                                                  SETLOC($$, @1); } 
            | OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, Expressions()); 
                                 SETLOC($$, @1); } 
            | IF expression THEN expression ELSE expression FI { $$ = std::make_shared<If>($2, $4, $6); SETLOC($$, @2); }
            | WHILE expression LOOP expression POOL { $$ = std::make_shared<While>($2, $4); SETLOC($$, @2); }
            | '{' expression_list '}' { $$ = std::make_shared<Block>($2); SETLOC($$, @2); }
            | LET let_expr { $$ = $2; SETLOC($$, @2); }
            | CASE expression OF case_list ESAC { $$ = std::make_shared<Case>($2, $4); SETLOC($$, @2); }
            | NEW TYPEID { $$ = std::make_shared<New>($2); SETLOC($$, @2); }
            | ISVOID expression { $$ = std::make_shared<IsVoid>($2); SETLOC($$, @2); }
            | expression '+' expression { $$ = std::make_shared<Plus>($1, $3); SETLOC($$, @1); }
            | expression '-' expression { $$ = std::make_shared<Sub>($1, $3); SETLOC($$, @1); }
            | expression '*' expression { $$ = std::make_shared<Mul>($1, $3); SETLOC($$, @1); }
            | expression '/' expression { $$ = std::make_shared<Div>($1, $3); SETLOC($$, @1); }
            | '~' expression { $$ = std::make_shared<Complement>($2); SETLOC($$, @2); }
            | expression '<' expression { $$ = std::make_shared<LessThan>($1, $3); SETLOC($$, @1); }
            | expression LE expression { $$ = std::make_shared<LessThanEqualTo>($1, $3); SETLOC($$, @1); }
            | expression '=' expression { $$ = std::make_shared<EqualTo>($1, $3); SETLOC($$, @1); }
            | NOT expression { $$ = std::make_shared<Not>($2); SETLOC($$, @2); }
            | '(' expression ')' { $$ = $2; SETLOC($$, @2); } 
            | OBJECTID { $$ = std::make_shared<Object>($1); SETLOC($$, @1); }
            | INT_CONST { $$ = std::make_shared<IntConst>($1); SETLOC($$, @1); }
            | STR_CONST { $$ = std::make_shared<StringConst>($1); SETLOC($$, @1); }
            | BOOL_CONST { $$ = std::make_shared<BoolConst>($1); SETLOC($$, @1); } 
;

%%

// utility function for converting bison tokens to its string representation
// for better error reporting
std::string convert_token(int token)
{
    std::string rep;

    switch (token)
    {
        case CLASS: rep = "class"; break;
        case ELSE: rep = "else"; break;
        case FI: rep = "fi"; break;
        case IF: rep = "if"; break;
        case IN: rep = "in"; break;
        case INHERITS: rep = "inherits"; break;
        case LET: rep = "let"; break;
        case LOOP: rep = "loop"; break;
        case POOL: rep = "pool"; break;
        case THEN: rep = "then"; break;
        case WHILE: rep = "while"; break;
        case CASE: rep = "case"; break;
        case ESAC: rep = "esac"; break;
        case OF: rep = "of"; break;
        case DARROW: rep = "=>"; break;
        case NEW: rep = "new"; break;
        case ISVOID: rep = "isvoid"; break;
        case ASSIGN: rep = "<-"; break;
        case NOT: rep = "not"; break;
        case LE: rep = "<="; break;
        case STR_CONST: rep = "STR_CONST = " + yylval.symbol.get_val(); break;
        case INT_CONST: rep = "INT_CONST = " + yylval.symbol.get_val(); break;
        case BOOL_CONST: rep = "BOOL_CONST = " + yylval.boolean; break;
        case TYPEID: rep = "TYPEID = " + yylval.symbol.get_val(); break;
        case OBJECTID: rep = "OBJECTID = " + yylval.symbol.get_val(); break;
        default: rep = (char) token;
    }     

    return rep;
}

void yyerror(char *)
{
    if (yylval.error_msg.length() <= 0)
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " <<  "syntax error near or at character or token '" << convert_token(yychar) << "'\n";
    else
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << yylval.error_msg << "\n";
}

@HAL9000 现在看一下。 - T.J.
1个回答

3
我不确定为什么你看不到任何输出,但我没有查看所有的代码。如果你从main调用yylex,那么它将读取并有效地丢弃一个标记。然后当你调用yyparse时,yyparse将自己调用yylex,直到yylex返回0。假设(但不一定)下一次你从main中的while循环调用yylex时,它将再次返回0,并且循环将结束。结果应该是从while循环中打印一个单词,然后是由yyparse产生的任何输出(如果有的话),这可能会发出语法错误信号,因为它从未看到输入的第一个标记。
我怀疑这不是你想要做的,但这并不完全清楚。
如果你想在词法分析时看到标记,请在每个词法动作中插入打印标记的语句。或者告诉flex将扫描函数命名为其他名称,比如yylex_internal,并创建自己的函数yylex(),它调用yylex_internal,然后打印结果再返回它。
如果像看起来那样,你只对此感兴趣是为了调试目的,那么你最好使用flex-d命令行选项,它会自动生成调试输出。它可能不是你想要的完全调试格式,但它更容易做和撤销 :)
要更改由flex生成的yylex函数的名称,请在.l文件顶部的代码块中插入以下内容:
#define YY_DECL int yylex_internal()

生成的 Flex 文件将扫描函数声明如下:
YY_DECL {
  /* body of function
}

所以您可以通过定义YY_DECL宏来重命名函数或添加参数,甚至更改返回类型。请参见Flex手册的Generated Scanner部分。

顺便说一下,尽管bison允许您手动为所有终端符号编号,但这通常不被认为是良好的风格。 您应该让bison自行编号,并通过#include "y.tab.h"(或您命名的任何bison头文件; 您可以使用-o选项轻松更改名称)将定义包含在源文件中。

能否详细说明一下“告诉Flex将扫描函数命名为其他名称,比如yylex_internal,并创建自己的函数称为yylex(),该函数调用yylex_internal并在返回结果之前打印它。”? - T.J.
我已经插入了在每个词法动作中打印令牌的语句,但仍然没有打印出来。 - T.J.
@T.J.:我对YY_DECL进行了一些详细说明。我建议尝试使用“-d”标志来获取调试输出。这可能会让你更好地了解正在发生的事情。 - rici

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接