ANTLR: Heterogeneous AST and imaginary tokens -
it's first question here :)
i'd build heterogeneous ast antlr simple grammar. there different interfaces represent ast nodes, e. g. iinfiexp, ivariabledecl. antlr comes commontree hold information of source code (line number, character position etc.) , want use base implementations of ast interfacese iinfixexp ...
in order ast output commontree node types, set:
options { language = java; k = 1; output = ast; astlabeltype = commontree; }
the iinifxexp is:
package toylanguage; public interface iinfixexp extends iexpression { public enum operator { plus, minus, times, divide; } public operator getoperator(); public iexpression getlefthandside(); public iexpression getrighthandside(); }
and implementation infixexp is:
package toylanguage; import org.antlr.runtime.token; import org.antlr.runtime.tree.commontree; // iinitializable has void initialize() public class infixexp extends commontree implements iinfixexp, iinitializable { private operator operator; private iexpression lefthandside; private iexpression righthandside; infixexp(token token) { super(token); } @override public operator getoperator() { return operator; } @override public iexpression getlefthandside() { return lefthandside; } @override public iexpression getrighthandside() { return righthandside; } // iinitializable. called toytreeadaptor.rulepostprocessing @override public void initialize() { // term ((plus|minus) term)+ // atom ((times|diide) atom)+ // exact 2 children assert getchildcount() == 2; // left , right child iexpressions assert getchild(0) instanceof iexpression && getchild(1) instanceof iexpression; // operator switch (token.gettype()) { case toylanguageparser.plus: operator = operator.plus; break; case toylanguageparser.minus: operator = operator.minus; break; case toylanguageparser.times: operator = operator.times; break; case toylanguageparser.divide: operator = operator.divide; break; default: assert false; } // left , right operands lefthandside = (iexpression) getchild(0); righthandside = (iexpression) getchild(1); } }
the corresponding rules are:
exp // e.g. a+b : term ((plus<infixexp>^|minus<infixexp>^) term)* ; term // e.g. a*b : atom ((times<infixexp>^|divide<infixexp>^) atom)* ;
this works fine, becouse plus, minus etc. "real" tokens.
but comes imaginary token:
tokens { program; }
the corresponding rule is:
program // e.g. var a, b; + b : vardecl* exp -> ^(program<program> vardecl* exp) ;
with this, antlr doesn't create tree program root node.
in parser, following code creates program instance:
root_1 = (commontree)adaptor.becomeroot(new program(program), root_1);
unlike infixexp not program(token) constructor program(int) invoked.
program is:
package toylanguage; import java.util.collections; import java.util.linkedlist; import java.util.list; import org.antlr.runtime.token; import org.antlr.runtime.tree.commontree; class program extends commontree implements iprogram, iinitializable { private final linkedlist<ivariabledecl> variabledeclarations = new linkedlist<ivariabledecl>(); private iexpression expression = null; program(token token) { super(token); } public program(int toketype) { // do? super(); } @override public list<ivariabledecl> getvariabledeclarations() { // don't allow change list return collections.unmodifiablelist(variabledeclarations); } @override public iexpression getexpression() { return expression; } @override public void initialize() { // program: vardecl* exp; // @ least 1 child assert getchildcount() > 0; // last 1 iexpression assert getchild(getchildcount() - 1) instanceof iexpression; // iterate on vardecl* int = 0; while (getchild(i) instanceof ivariabledecl) { variabledeclarations.add((ivariabledecl) getchild(i)); i++; } // exp expression = (iexpression) getchild(i); } }
you can see constructor:
public program(int toketype) { // do? super(); }
as result of it, super() commontree ist build without token. commontreeadaptor.rulepostprocessing see flat list, not tree token root.
my treeadaptor looks like:
package toylanguage; import org.antlr.runtime.tree.commontreeadaptor; public class toytreeadaptor extends commontreeadaptor { public object rulepostprocessing(object root) { object result = super.rulepostprocessing(root); // check if needs initialising if (result instanceof iinitializable) { iinitializable initializable = (iinitializable) result; initializable.initialize(); } return result; }; }
and test use:
package toylanguage; import org.antlr.runtime.antlrstringstream; import org.antlr.runtime.commontokenstream; import org.antlr.runtime.recognitionexception; import org.antlr.runtime.tokenstream; import org.antlr.runtime.tree.commontree; import toylanguage.toylanguageparser.program_return; public class processor { public static void main(string[] args) { string input = "var a, b; + b + 123"; // sample input antlrstringstream stream = new antlrstringstream(input); toylanguagelexer lexer = new toylanguagelexer(stream); tokenstream tokens = new commontokenstream(lexer); toylanguageparser parser = new toylanguageparser(tokens); toytreeadaptor treeadaptor = new toytreeadaptor(); parser.settreeadaptor(treeadaptor); try { // test with: var a, b; + b program_return program = parser.program(); commontree root = program.tree; // prints 'a b (+ b)' system.out.println(root.tostringtree()); // (+ b), third child of root commontree third = (commontree) root.getchild(2); // prints '(+ b)' system.out.println(third.tostringtree()); // prints 'true' system.out.println(third instanceof iinfixexp); // prints 'false' system.out.println(root instanceof iprogram); } catch (recognitionexception e) { e.printstacktrace(); } } }
for completeness, here full grammar:
grammar toylanguage; options { language = java; k = 1; output = ast; astlabeltype = commontree; } tokens { program; } @header { package toylanguage; } @lexer::header { package toylanguage; } program // e.g. var a, b; + b : vardecl* exp -> ^(program<program> vardecl* exp) ; vardecl // e.g. var a, b; : 'var'! id<variabledecl> (','! id<variabledecl>)* ';'! ; exp // e.g. a+b : term ((plus<infixexp>^|minus<infixexp>^) term)* ; term // e.g. a*b : atom ((times<infixexp>^|divide<infixexp>^) atom)* ; atom : int<integerliteralexp> // e.g. 123 | id<variableexp> // e.g. | '(' exp ')' -> exp // e.g. (a+b) ; int : ('0'..'9')+ ; id : ('a'..'z')+ ; plus : '+' ; minus : '-' ; times : '*' ; divide : '/' ; ws : ('\t' | '\n' | '\r' | ' ')+ { $channel = hidden; } ;
ok, final question how from
program // e.g. var a, b; + b : vardecl* exp -> ^(program<program> vardecl* exp) ;
a tree program root
^(program vardecl* exp)
and not flat list with
(vardecl* exp) ?
(sorry numerous code fragments)
ciao vertex
try creating following constructor:
public program(int tokentype) { super(new commontoken(tokentype, "program")); }
Comments
Post a Comment