我已经将Terrence Parr的书《语言实现模式》中关于“树语法”模式的代码(
源代码位于tpdsl-code/walking/tree-grammar下)从antlr 3转换为antlr 4,使用了Visitor和“同质AST”模式。
以下是语法:
VecMath.g4
grammar VecMath;
tokens
prog: stat+ ;
stat: ID assign='=' expr #StatAssign
| print='print' expr #StatPrint
;
expr: left=expr op=('*'|'.') right=expr #ExprMult
| left=expr op='+' right=expr #ExprAdd
| '[' expr (',' expr)* ']' #ExprVec
| INT #ExprInt
| ID #ExprId
;
ID : 'a'..'z'+ ;
INT : '0'..'9'+ ;
WS : (' '|'\r'|'\n')+ -> skip ;
访问者
package walking.v4.vecmath_ast.impl;
import org.antlr.v4.runtime.CommonToken;
import walking.v4.vecmath_ast.antlr.VecMathBaseVisitor;
import walking.v4.vecmath_ast.antlr.VecMathParser.ExprAddContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.ExprContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.ExprIdContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.ExprIntContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.ExprMultContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.ExprVecContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.ProgContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.StatAssignContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.StatContext;
import walking.v4.vecmath_ast.antlr.VecMathParser.StatPrintContext;
import walking.v4.vecmath_ast.antlr.VecMathParser;
public class VecMathBuildASTVisitor extends VecMathBaseVisitor<AST> {
@Override
public AST visitProg(ProgContext ctx) {
AST ast = new AST();
for (StatContext stmt : ctx.stat()) {
ast.addChild(visit(stmt));
}
return ast;
}
@Override
public AST visitStatAssign(StatAssignContext ctx) {
AST ast = new AST(ctx.assign);
ast.addChild(new AST(ctx.ID().getSymbol()));
ast.addChild(visit(ctx.expr()));
return ast;
}
@Override
public AST visitStatPrint(StatPrintContext ctx) {
AST ast = new AST(ctx.print);
ast.addChild(visit(ctx.expr()));
return ast;
}
@Override
public AST visitExprMult(ExprMultContext ctx) {
AST ast = new AST(ctx.op);
ast.addChild(visit(ctx.left));
ast.addChild(visit(ctx.right));
return ast;
}
@Override
public AST visitExprAdd(ExprAddContext ctx) {
AST ast = new AST(ctx.op);
ast.addChild(visit(ctx.left));
ast.addChild(visit(ctx.right));
return ast;
}
@Override
public AST visitExprVec(ExprVecContext ctx) {
AST ast = new AST(new CommonToken(VecMathParser.VEC, "VEC"));
for (ExprContext expr : ctx.expr()) {
ast.addChild(visit(expr));
}
return ast;
}
@Override
public AST visitExprId(ExprIdContext ctx) {
AST ast = new AST(ctx.ID().getSymbol());
return ast;
}
@Override
public AST visitExprInt(ExprIntContext ctx) {
AST ast = new AST(ctx.INT().getSymbol());
return ast;
}
}
抽象语法树(AST)
本质上只是相对于tpdsl-code/IR/Homo的原始版本调整了令牌。
package walking.v4.vecmath_ast.impl;
import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.Token;
import walking.v4.vecmath_ast.antlr.VecMathParser;
import java.util.ArrayList;
import java.util.List;
public class AST {
Token token;
List<AST> children;
public AST() { ; }
public AST(Token token) { this.token = token; }
public AST(int tokenType) { this.token = new CommonToken(tokenType); }
public int getNodeType() { return token.getType(); }
public void addChild(AST t) {
if (children == null) children = new ArrayList<>();
children.add(t);
}
public List<AST> getChildren() { return children; }
public boolean isNil() { return token == null; }
public String toString() {
String typeName = VecMathParser.VOCABULARY.getSymbolicName(getNodeType());
typeName = typeName == null ? token.getText() : typeName;
return token != null ? "<" +typeName +", '" + token.getText() +"'>": "nil";
}
public String toStringTree() {
if (children == null || children.size() == 0) return this.toString();
StringBuffer buf = new StringBuffer();
if (!isNil()) {
buf.append('(');
buf.append(this.toString());
buf.append(' ');
}
for (int i = 0; i < children.size(); i++) {
AST t = (AST) children.get(i);
if (i>0) buf.append(' ');
buf.append(t.toStringTree());
}
if (!isNil()) buf.append(')');
return buf.toString();
}
}
测试类
package walking.v4.vecmath_ast;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import walking.v4.vecmath_ast.antlr.VecMathLexer;
import walking.v4.vecmath_ast.antlr.VecMathParser;
import walking.v4.vecmath_ast.impl.AST;
import walking.v4.vecmath_ast.impl.VecMathBuildASTVisitor;
public class Test {
public static void main(String[] args) throws Exception {
CharStream input = CharStreams.fromFileName(args[0]);
VecMathLexer lexer = new VecMathLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
VecMathParser parser = new VecMathParser(tokens);
ParseTree tree = parser.prog();
for (AST ast : new VecMathBuildASTVisitor().visit(tree).getChildren()) {
System.out.println(ast.toStringTree());
}
}
}
测试输入
x = 3 + 4
y = 3 + 4 + 5
a = 3 * 4
a = 3 * 4 * 5
c = 3 * 4 + 5
print x * [2, 3, 4]
print x * [2+5, 3, 4]
产生:
(<=, '='> <ID, 'x'> (<+, '+'> <INT, '3'> <INT, '4'>))
(<=, '='> <ID, 'y'> (<+, '+'> (<+, '+'> <INT, '3'> <INT, '4'>) <INT, '5'>))
(<=, '='> <ID, 'a'> (<*, '*'> <INT, '3'> <INT, '4'>))
(<=, '='> <ID, 'a'> (<*, '*'> (<*, '*'> <INT, '3'> <INT, '4'>) <INT, '5'>))
(<=, '='> <ID, 'c'> (<+, '+'> (<*, '*'> <INT, '3'> <INT, '4'>) <INT, '5'>))
(<print, 'print'> (<*, '*'> <ID, 'x'> (<VEC, 'VEC'> <INT, '2'> <INT, '3'> <INT, '4'>)))
(<print, 'print'> (<*, '*'> <ID, 'x'> (<VEC, 'VEC'> (<+, '+'> <INT, '2'> <INT, '5'>) <INT, '3'> <INT, '4'>)))