use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class GrammarParserInterpreter method getLookaheadParseTrees.
/** Return a list of parse trees, one for each alternative in a decision
* given the same input.
*
* Very similar to {@link #getAllPossibleParseTrees} except
* that it re-parses the input for every alternative in a decision,
* not just the ambiguous ones (there is no alts parameter here).
* This method also tries to reduce the size of the parse trees
* by stripping away children of the tree that are completely out of range
* of startIndex..stopIndex. Also, because errors are expected, we
* use a specialized error handler that more or less bails out
* but that also consumes the first erroneous token at least. This
* ensures that an error node will be in the parse tree for display.
*
* NOTES:
// we must parse the entire input now with decision overrides
// we cannot parse a subset because it could be that a decision
// above our decision of interest needs to read way past
// lookaheadInfo.stopIndex. It seems like there is no escaping
// the use of a full and complete token stream if we are
// resetting to token index 0 and re-parsing from the start symbol.
// It's not easy to restart parsing somewhere in the middle like a
// continuation because our call stack does not match the
// tree stack because of left recursive rule rewriting. grrrr!
*
* @since 4.5.1
*/
public static List<ParserRuleContext> getLookaheadParseTrees(Grammar g, ParserInterpreter originalParser, TokenStream tokens, int startRuleIndex, int decision, int startIndex, int stopIndex) {
List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
// Create a new parser interpreter to parse the ambiguous subphrase
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
DecisionState decisionState = originalParser.getATN().decisionToState.get(decision);
for (int alt = 1; alt <= decisionState.getTransitions().length; alt++) {
// re-parse entire input for all ambiguous alternatives
// (don't have to do first as it's been parsed, but do again for simplicity
// using this temp parser.)
GrammarParserInterpreter.BailButConsumeErrorStrategy errorHandler = new GrammarParserInterpreter.BailButConsumeErrorStrategy();
parser.setErrorHandler(errorHandler);
parser.reset();
parser.addDecisionOverride(decision, startIndex, alt);
ParserRuleContext tt = parser.parse(startRuleIndex);
int stopTreeAt = stopIndex;
if (errorHandler.firstErrorTokenIndex >= 0) {
// cut off rest at first error
stopTreeAt = errorHandler.firstErrorTokenIndex;
}
Interval overallRange = tt.getSourceInterval();
if (stopTreeAt > overallRange.b) {
// If we try to look beyond range of tree, stopTreeAt must be EOF
// for which there is no EOF ref in grammar. That means tree
// will not have node for stopTreeAt; limit to overallRange.b
stopTreeAt = overallRange.b;
}
ParserRuleContext subtree = Trees.getRootOfSubtreeEnclosingRegion(tt, startIndex, stopTreeAt);
// Use higher of overridden decision tree or tree enclosing all tokens
if (Trees.isAncestorOf(parser.getOverrideDecisionRoot(), subtree)) {
subtree = parser.getOverrideDecisionRoot();
}
Trees.stripChildrenOutOfRange(subtree, parser.getOverrideDecisionRoot(), startIndex, stopTreeAt);
trees.add(subtree);
}
return trees;
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class GrammarTransformPipeline method extractImplicitLexer.
/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section. Copy over named actions.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
//tool.log("grammar", "before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText() + "Lexer";
GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((GrammarAST) adaptor.create(ANTLRParser.ID, lexerName));
// COPY OPTIONS
GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
GrammarAST lexerOptionsRoot = (GrammarAST) adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
lexerOptionsRoot.addChild(optionTree);
lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
}
}
}
// COPY all named actions, but only move those with lexer:: scope
List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
for (GrammarAST e : elements) {
if (e.getType() == ANTLRParser.AT) {
lexerAST.addChild((Tree) adaptor.dupTree(e));
if (e.getChild(0).getText().equals("lexer")) {
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild(r);
}
GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if (combinedRulesRoot == null)
return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot = (GrammarAST) adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
GrammarASTWithOptions[] rules;
if (combinedRulesRoot.getChildCount() > 0) {
rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
} else {
rules = new GrammarASTWithOptions[0];
}
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if (Grammar.isTokenName(ruleName)) {
lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild(r);
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
List<Pair<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
Set<String> stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// tool.log("grammar", "strings from parser: "+stringLiterals);
int insertIndex = 0;
nextLit: for (String lit : stringLiterals) {
// if lexer already has a rule for literal, continue
if (litAliases != null) {
for (Pair<GrammarAST, GrammarAST> pair : litAliases) {
GrammarAST litAST = pair.b;
if (lit.equals(litAST.getText()))
continue nextLit;
}
}
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.insertChild(insertIndex, litRule);
// lexerRulesRoot.getChildren().add(0, litRule);
// reset indexes and set litRule parent
lexerRulesRoot.freshenParentAndChildIndexes();
// next literal will be added after the one just added
insertIndex++;
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// tool.log("grammar", combinedAST.toTokenString());
combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
if (lexerRulesRoot.getChildCount() == 0)
return null;
return lexerAST;
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class GrammarTransformPipeline method expandParameterizedLoops.
/** Find and replace
* ID*[','] with ID (',' ID)*
* ID+[','] with ID (',' ID)+
* (x {action} y)+[','] with x {action} y (',' x {action} y)+
*
* Parameter must be a token.
* todo: do we want?
*/
public void expandParameterizedLoops(GrammarAST root) {
TreeVisitor v = new TreeVisitor(new GrammarASTAdaptor());
v.visit(root, new TreeVisitorAction() {
@Override
public Object pre(Object t) {
if (((GrammarAST) t).getType() == 3) {
return expandParameterizedLoop((GrammarAST) t);
}
return t;
}
@Override
public Object post(Object t) {
return t;
}
});
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class TestUnbufferedTokenStream method testMarkThenRelease.
@Test
public void testMarkThenRelease() throws Exception {
LexerGrammar g = new LexerGrammar("lexer grammar t;\n" + "ID : 'a'..'z'+;\n" + "INT : '0'..'9'+;\n" + "SEMI : ';';\n" + "ASSIGN : '=';\n" + "PLUS : '+';\n" + "MULT : '*';\n" + "WS : ' '+;\n");
// Tokens: 012345678901234567
// Input: x = 302;
CharStream input = new ANTLRInputStream(new StringReader("x = 302 + 1;"));
LexerInterpreter lexEngine = g.createLexerInterpreter(input);
TestingUnbufferedTokenStream<Token> tokens = new TestingUnbufferedTokenStream<Token>(lexEngine);
int m = tokens.mark();
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
assertEquals("x", tokens.LT(1).getText());
// consume x
tokens.consume();
assertEquals("[[@0,0:0='x',<1>,1:0], [@1,1:1=' ',<7>,1:1]]", tokens.getBuffer().toString());
// ' '
tokens.consume();
// =
tokens.consume();
// ' '
tokens.consume();
assertEquals("302", tokens.LT(1).getText());
// "x = 302" is in buffer. will kill buffer
tokens.release(m);
// 302
tokens.consume();
// ' '
tokens.consume();
// mark at the +
m = tokens.mark();
assertEquals("+", tokens.LT(1).getText());
// '+'
tokens.consume();
// ' '
tokens.consume();
// 1
tokens.consume();
// ;
tokens.consume();
assertEquals("<EOF>", tokens.LT(1).getText());
// we marked at the +, so that should be the start of the buffer
assertEquals("[[@6,8:8='+',<5>,1:8], [@7,9:9=' ',<7>,1:9]," + " [@8,10:10='1',<2>,1:10], [@9,11:11=';',<3>,1:11]," + " [@10,12:11='<EOF>',<-1>,1:12]]", tokens.getBuffer().toString());
tokens.release(m);
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class TestUnbufferedTokenStream method testNoBuffering.
@Test
public void testNoBuffering() throws Exception {
LexerGrammar g = new LexerGrammar("lexer grammar t;\n" + "ID : 'a'..'z'+;\n" + "INT : '0'..'9'+;\n" + "SEMI : ';';\n" + "ASSIGN : '=';\n" + "PLUS : '+';\n" + "MULT : '*';\n" + "WS : ' '+;\n");
// Tokens: 012345678901234567
// Input: x = 302;
CharStream input = new ANTLRInputStream(new StringReader("x = 302;"));
LexerInterpreter lexEngine = g.createLexerInterpreter(input);
TestingUnbufferedTokenStream<Token> tokens = new TestingUnbufferedTokenStream<Token>(lexEngine);
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
assertEquals("x", tokens.LT(1).getText());
// move to WS
tokens.consume();
assertEquals(" ", tokens.LT(1).getText());
assertEquals("[[@1,1:1=' ',<7>,1:1]]", tokens.getRemainingBuffer().toString());
tokens.consume();
assertEquals("=", tokens.LT(1).getText());
assertEquals("[[@2,2:2='=',<4>,1:2]]", tokens.getRemainingBuffer().toString());
tokens.consume();
assertEquals(" ", tokens.LT(1).getText());
assertEquals("[[@3,3:3=' ',<7>,1:3]]", tokens.getRemainingBuffer().toString());
tokens.consume();
assertEquals("302", tokens.LT(1).getText());
assertEquals("[[@4,4:6='302',<2>,1:4]]", tokens.getRemainingBuffer().toString());
tokens.consume();
assertEquals(";", tokens.LT(1).getText());
assertEquals("[[@5,7:7=';',<3>,1:7]]", tokens.getRemainingBuffer().toString());
}
Aggregations