Flex / Bison 解析器编译时出现分段错误

4

我正在使用 flex/bison 编写一个解析器(虽然我也可以用 Python 写解析器,但我总是更喜欢经典的方法)。

当我使用以下代码编译时:

gcc -lfl -ly chance.tab.c lex.yy.c -o chance

当我使用文件运行程序时,会得到类似于这样的输出:
Segmentation fault (core dumped)

任何人参考,以下是文件: chance.y
%{
    #include <stdio.h>
%}

%union {
    char* str;
}

%token ASSERT BREAK CATCH CLASS CONTINUE DEL EACH ELSE ELSEIF FINALLY FROM
%token FUNC IF LOAD PASS PRINT REPEAT RETURN RUN THROW TRY WHILE UNTIL
%token YIELD AND OR NOT KTRUE KFALSE NONE

%token MINUS EXCLAM PERCENT LAND LPAREN RPAREN STAR COMMA DOT SLASH COLON
%token SEMICOLON QUESTION AT LBRACKET BACKSLASH RBRACKET CIRCUMFLEX LBRACE
%token BAR RBRACE TILDE PLUS LTHAN EQUAL GTHAN INTDIV

%token ADDASS SUBASS MULASS DIVASS INTDASS MODASS ANDASS ORASS LTEQ EQUALS
%token GTEQ INCREMENT DECREMENT DBLSTAR

%token<str> NAME STRING INTEGER FLOAT
%token INDENT DEDENT NEWLINE

%type<str> exprs names args kwdspec dfltarg arg arglist exprlist name namelist
%type<str> funcargs parenexpr lstexpr eachspec optargs inheritance addop
%type<str> expr ifs elifs elif elses trys catchs catchx finally suite stmts
%type<str> stmt program

%start program

%%

exprs: expr                         { $$ = $1; }
|   exprs COMMA expr                { sprintf($$, "%s %s", $1, $3); }
;

names: name                         { $$ = $1; }
|   names COMMA name                { sprintf($$, "%s %s", $1, $3); }
;

args: arg                           { $$ = $1; }
|   args COMMA arg                  { sprintf($$, "%s %s", $1, $3); }
;

kwdspec:                            { $$ = "regular"; }
|   STAR                            { $$ = "list"; }
|   DBLSTAR                         { $$ = "keyword"; }
;

dfltarg:                            { $$ = "null"; }
|   EQUAL expr                      { $$ = $2; }
;

arg: kwdspec name dfltarg
        { sprintf($$, "(argument %s %s %s)", $1, $2, $3); } ;

arglist: args                       { sprintf($$, "[%s]", $1); } ;
exprlist: exprs                     { sprintf($$, "[%s]", $1); } ;
name: NAME                          { sprintf($$, "(name %s)", $1); } ;
namelist: names                     { sprintf($$, "[%s]", $1); } ;
funcargs: LPAREN arglist RPAREN     { $$ = $2 } ;
parenexpr: LPAREN exprlist RPAREN   { sprintf($$, "(tuple %s)", $2); } ;
lstexpr: LBRACKET exprlist RBRACKET { sprintf($$, "(list %s)", $2); } ;

eachspec: BAR namelist BAR          { sprintf($$, "(each-spec %s)", $2); } ;

optargs:                            { $$ = ""; }
|   funcargs                        { $$ = $1; }
;

inheritance:                        { $$ = ""; }
|   parenexpr                       { $$ = $1; }
;

addop:
    ADDASS                          { $$ = "add"; }
|   SUBASS                          { $$ = "sub"; }
|   MULASS                          { $$ = "mul"; }
|   DIVASS                          { $$ = "div"; }
|   INTDASS                         { $$ = "int-div"; }
|   MODASS                          { $$ = "mod"; }
|   ANDASS                          { $$ = "and"; }
|   ORASS                           { $$ = "or"; }
;

expr:       /* NotYetImplemented! */
    NUMBER                          { sprintf($$, "(number %s)", $1); }
|   TRUE                            { $$ = "(true)"; }
|   FALSE                           { $$ = "(false)"; }
|   NONE                            { $$ = "(none)"; }
|   STRING                          { sprintf($$, "(string %s)", $1); }
|   lstexpr                         { $$ = $1; }
;

ifs: IF expr suite                  { sprintf($$, "(if %s %s)", $2, $3); } ;

elifs:                              { $$ = ""; }
|   elifs elif                      { sprintf($$, "%s %s", $1, $2); }
;

elif: ELSEIF expr suite             { sprintf($$, "(else-if %s %s)", $2, $3); } ;

elses:                              { $$ = ""; }
|   ELSE suite                      { sprintf($$, "(else %s)", $2); }
;

trys: TRY suite                     { sprintf($$, "(try %s)", $2); } ;

catchs:                             { $$ = ""; }
| catchs catchx                     { sprintf($$, "%s %s", $1, $2); }
;

catchx: CATCH expr suite            { sprintf($$, "(catch %s %s)", $2, $3); } ;

finally: FINALLY suite              { sprintf($$, "(finally %s)", $2); } ;

suite: COLON stmts SEMICOLON        { sprintf($$, "(block [%s])", $2); } ;

stmts:                              { $$ = ""; }
|   stmts NEWLINE stmt              { sprintf($$, "%s %s", $1, $3); }
;

stmt:
    ASSERT expr                     { printf("(assert %s)", $2); }
|   BREAK                           { printf("(break)"); }
|   CATCH expr suite                { printf("(catch %s %s)", $2, $3); }
|   CLASS name inheritance suite    { printf("(class %s %s %s)", $2, $3, $4); }
|   CONTINUE                        { printf("(continue)"); }
|   DEL expr                        { printf("(del %s)", $2); }
|   expr DOT EACH eachspec suite    { printf("(each %s %s %s)", $1, $4, $5); }
|   FROM name LOAD namelist         { printf("(from %s %s)", $2, $4); }
|   FUNC name optargs suite         { printf("(func %s %s %s)", $2, $3, $4); }
|   ifs elifs elses                 { printf("(if-block %s %s %s)", $1, $2, $3); }
|   LOAD namelist                   { printf("(load %s)", $2); }
|   PASS                            { printf("(pass)"); }
|   PRINT expr                      { printf("(print %s)", $2); }
|   REPEAT expr suite               { printf("(repeat %s %s)", $2, $3); }
|   RUN expr                        { printf("(run %s)", $2); }
|   THROW expr                      { printf("(throw %s)", $2); }
|   trys catchs elses finally       { printf("(try-block %s %s %s %s)", $1, $2, $3, $4); }
|   WHILE expr suite                { printf("(while %s %s)", $2, $3); }
|   UNTIL expr suite                { printf("(until %s %s)", $2, $3); }
|   YIELD expr                      { printf("(yield %s)", $2); }
|   RETURN expr                     { printf("(return %s)", $2); }
|   expr addop expr                 { printf("(%s-assign %s %s)", $2, $1, $3); }
|   expr INCREMENT                  { printf("(increment %s)", $1); }
|   expr DECREMENT                  { printf("(decrement %s)", $1); }
|   expr                            { printf("(expr-stmt %s)", $1); }
;

program: stmts                      { printf("(program [%s])", $1); } ;

chance.l

%{
    #include <assert.h>
    #include <stdio.h>

    #include "parser.tab.h"
%}

%option yylineno
%option noyywrap

%%

"assert"    { return ASSERT; }
"break"     { return BREAK; }
"catch"     { return CATCH; }
"class"     { return CLASS; }
"continue"  { return CONTINUE; }
"del"       { return DEL; }
"each"      { return EACH; }
"else"      { return ELSE; }
"elseif"    { return ELSEIF; }
"finally"   { return FINALLY; }
"from"      { return FROM; }
"func"      { return FUNC; }
"if"        { return IF; }
"load"      { return LOAD; }
"pass"      { return PASS; }
"print"     { return PRINT; }
"repeat"    { return REPEAT; }
"return"    { return RETURN; }
"run"       { return RUN; }
"throw"     { return THROW; }
"try"       { return TRY; }
"while"     { return WHILE; }
"until"     { return UNTIL; }
"yield"     { return YIELD; }
"and"       { return AND; }
"or"        { return OR; }
"not"       { return NOT; }
"true"      { return KTRUE; }
"false"     { return KFALSE; }
"none"      { return NONE; }

-           { return MINUS; }
!           { return EXCLAM; }
%           { return PERCENT; }
&           { return LAND; }
\(          { return LPAREN; }
\)          { return RPAREN; }
\*          { return STAR; }
,           { return COMMA; }
\.          { return DOT; }
\/          { return SLASH; }
:           { return COLON; }
;           { return SEMICOLON; }
\?          { return QUESTION; }
 @          { return AT; }
\[          { return LBRACKET; }
\]          { return RBRACKET; }
\^          { return CIRCUMFLEX; }
\{          { return LBRACE; }
\}          { return RBRACE; }
\|          { return BAR; }
~           { return TILDE; }
\+          { return PLUS; }
\<          { return LTHAN; }
=           { return EQUAL; }
\>          { return GTHAN; }
\/\/        { return INTDIV; }
\+=         { return ADDASS; }
-=          { return SUBASS; }
\*=         { return MULASS; }
\/=         { return DIVASS; }
\/\/=       { return INTDASS; }
%=          { return MODASS; }
&=          { return ANDASS; }
\|=         { return ORASS; }
\<=         { return LTEQ; }
==          { return EQUALS; }
\>=         { return GTEQ; }
\+\+        { return INCREMENT; }
--          { return DECREMENT; }
\*\*        { return DBLSTAR; }

[[:digit:]]+([eE][+-]?[[:digit:]]+)?                { yylval.str = strdup(yytext); return INTEGER; }
[[:digit:]]+\.[[:digit:]]+([eE][+-]?[[:digit:]]+)?  { yylval.str = strdup(yytext); return FLOAT; }
[a-zA-Z_][a-zA-Z0-9_]*                              { yylval.str = strdup(yytext); return NAME; }

\"([^\"])*\"    { yylval.str = strdup(yytext); return STRING; }
\'([^\'])*\'    { yylval.str = strdup(yytext); return STRING; }
`([^`])*`       { yylval.str = strdup(yytext); return STRING; }

"<INDENT>"  { return INDENT; }
"<DEDENT>"  { return DEDENT; }
"<NEWLINE>" { return NEWLINE; }

#.*         { }

[ \\\t]     {}
\n          { (yylineno) += 0.5; }
.           { yyerror(); }

%%

int yyerror(void)
{
    printf("Invalid syntax on line %d: '%s'\n", yylineno, yytext);
}

int main()
{
    yyparse();
    printf("\n");
    return 0;
}

如果以上程序对任何人有用,这里是一些在我的小型编程语言中的示例代码: test.ch
from timer load x

func x(f=0, **k):
    5.each|x|:
        continue;;

class OhHey:
    func __init__:
        print 5;;

while true:
    print x;

[1, 2, 3]
(1, 2, 3)

提前感谢。~~Chance

编辑: 输入新的和改进的代码(不幸的是,仍然导致段错误。)

1个回答

4
您的词法分析器从未设置 yylval,因此当您的解析器读取令牌的值时,它会得到随机垃圾。例如,在您的规则中:
expr: NUMBER { sprintf($$, "(number %s)", $1); }

$1 指的是 NUMBER 的记号值,因此会是随机垃圾值。另外,$$ 是规则的输出,因此您在这里传递给 sprintf 的值也将是随机垃圾值(因为您没有先将其设置为某个值)。

编辑

一个“简单”的解决方案是慷慨地使用 strdup/asprintf 为字符串分配内存。例如,在您的 .l 文件中,您可以有以下内容:

[+-]?[0-9]+(\.[0-9]+)?([Ee][+-]?[0-9]+)? { yylval = strdup(yytext); return NUMBER; }

那么您的expr规则应该是:
expr: NUMBER { asprintf(&$$, "(number %s)", $1); free($1); }

当然问题在于,找出所有的空闲位置以避免内存泄漏可能会很困难。

我在网上了解了flex,并看到了这个小片段:yylval = strdup(yytext)。这是我应该使用的吗? - Sammi De Guzman
@ChanceDeGuzman:是的,在词法分析器中设置yylval会起作用。但在解析器中仍需要将$$设置为某个值。 - Chris Dodd
那我需要同时使用strdupasprintf吗?我对asprintffree不是很熟悉。 - Sammi De Guzman
1
我不知道经典的 yacc 是否有这个功能,但 GNU bison 有一个 %destructor 指令,可以在内存管理方面使事情变得更容易。 - Jack Kelly

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接