use of org.antlr.v4.tool.ast.BlockAST in project antlr4 by antlr.
the class GrammarTransformPipeline method extractImplicitLexer.
/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section. Copy over named actions.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
//tool.log("grammar", "before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText() + "Lexer";
GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((GrammarAST) adaptor.create(ANTLRParser.ID, lexerName));
// COPY OPTIONS
GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
GrammarAST lexerOptionsRoot = (GrammarAST) adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
lexerOptionsRoot.addChild(optionTree);
lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
}
}
}
// COPY all named actions, but only move those with lexer:: scope
List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
for (GrammarAST e : elements) {
if (e.getType() == ANTLRParser.AT) {
lexerAST.addChild((Tree) adaptor.dupTree(e));
if (e.getChild(0).getText().equals("lexer")) {
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild(r);
}
GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if (combinedRulesRoot == null)
return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot = (GrammarAST) adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
GrammarASTWithOptions[] rules;
if (combinedRulesRoot.getChildCount() > 0) {
rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
} else {
rules = new GrammarASTWithOptions[0];
}
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if (Grammar.isTokenName(ruleName)) {
lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild(r);
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
List<Pair<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
Set<String> stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// tool.log("grammar", "strings from parser: "+stringLiterals);
int insertIndex = 0;
nextLit: for (String lit : stringLiterals) {
// if lexer already has a rule for literal, continue
if (litAliases != null) {
for (Pair<GrammarAST, GrammarAST> pair : litAliases) {
GrammarAST litAST = pair.b;
if (lit.equals(litAST.getText()))
continue nextLit;
}
}
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.insertChild(insertIndex, litRule);
// lexerRulesRoot.getChildren().add(0, litRule);
// reset indexes and set litRule parent
lexerRulesRoot.freshenParentAndChildIndexes();
// next literal will be added after the one just added
insertIndex++;
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// tool.log("grammar", combinedAST.toTokenString());
combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
if (lexerRulesRoot.getChildCount() == 0)
return null;
return lexerAST;
}
use of org.antlr.v4.tool.ast.BlockAST in project antlr4 by antlr.
the class BasicSemanticChecks method finishRule.
@Override
public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) {
if (rule.isLexerRule())
return;
BlockAST blk = (BlockAST) rule.getFirstChildWithType(BLOCK);
int nalts = blk.getChildCount();
GrammarAST idAST = (GrammarAST) rule.getChild(0);
for (int i = 0; i < nalts; i++) {
AltAST altAST = (AltAST) blk.getChild(i);
if (altAST.altLabel != null) {
String altLabel = altAST.altLabel.getText();
// first check that label doesn't conflict with a rule
// label X or x can't be rule x.
Rule r = ruleCollector.rules.get(Utils.decapitalize(altLabel));
if (r != null) {
g.tool.errMgr.grammarError(ErrorType.ALT_LABEL_CONFLICTS_WITH_RULE, g.fileName, altAST.altLabel.token, altLabel, r.name);
}
// Now verify that label X or x doesn't conflict with label
// in another rule. altLabelToRuleName has both X and x mapped.
String prevRuleForLabel = ruleCollector.altLabelToRuleName.get(altLabel);
if (prevRuleForLabel != null && !prevRuleForLabel.equals(rule.getRuleName())) {
g.tool.errMgr.grammarError(ErrorType.ALT_LABEL_REDEF, g.fileName, altAST.altLabel.token, altLabel, rule.getRuleName(), prevRuleForLabel);
}
}
}
List<GrammarAST> altLabels = ruleCollector.ruleToAltLabels.get(rule.getRuleName());
int numAltLabels = 0;
if (altLabels != null)
numAltLabels = altLabels.size();
if (numAltLabels > 0 && nalts != numAltLabels) {
g.tool.errMgr.grammarError(ErrorType.RULE_WITH_TOO_FEW_ALT_LABELS, g.fileName, idAST.token, rule.getRuleName());
}
}
use of org.antlr.v4.tool.ast.BlockAST in project antlr4 by antlr.
the class ParserATNFactory method star.
/**
* From {@code (blk)*} build {@code ( blk+ )?} with *two* decisions, one for
* entry and one for choosing alts of {@code blk}.
*
* <pre>
* |-------------|
* v |
* o--[o-blk-o]->o o
* | ^
* -----------------|
* </pre>
*
* Note that the optional bypass must jump outside the loop as
* {@code (A|B)*} is not the same thing as {@code (A|B|)+}.
*/
@Override
public Handle star(GrammarAST starAST, Handle elem) {
StarBlockStartState blkStart = (StarBlockStartState) elem.left;
BlockEndState blkEnd = (BlockEndState) elem.right;
preventEpsilonClosureBlocks.add(new Triple<Rule, ATNState, ATNState>(currentRule, blkStart, blkEnd));
StarLoopEntryState entry = newState(StarLoopEntryState.class, starAST);
entry.nonGreedy = !((QuantifierAST) starAST).isGreedy();
atn.defineDecisionState(entry);
LoopEndState end = newState(LoopEndState.class, starAST);
StarLoopbackState loop = newState(StarLoopbackState.class, starAST);
entry.loopBackState = loop;
end.loopBackState = loop;
BlockAST blkAST = (BlockAST) starAST.getChild(0);
if (((QuantifierAST) starAST).isGreedy()) {
if (expectNonGreedy(blkAST)) {
g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, starAST.getToken(), starAST.getToken().getText());
}
// loop enter edge (alt 1)
epsilon(entry, blkStart);
// bypass loop edge (alt 2)
epsilon(entry, end);
} else {
// if not greedy, priority to exit branch; make it first
// bypass loop edge (alt 1)
epsilon(entry, end);
// loop enter edge (alt 2)
epsilon(entry, blkStart);
}
// block end hits loop back
epsilon(blkEnd, loop);
// loop back to entry/exit decision
epsilon(loop, entry);
// decision is to enter/exit; blk is its own decision
starAST.atnState = entry;
return new Handle(entry, end);
}
use of org.antlr.v4.tool.ast.BlockAST in project antlr4 by antlr.
the class LeftRecursiveRuleTransformer method setAltASTPointers.
/**
* <pre>
* (RULE e int _p (returns int v)
* (BLOCK
* (ALT
* (BLOCK
* (ALT INT {$v = $INT.int;})
* (ALT '(' (= x e) ')' {$v = $x.v;})
* (ALT ID))
* (* (BLOCK
* (OPTIONS ...)
* (ALT {7 >= $_p}? '*' (= b e) {$v = $a.v * $b.v;})
* (ALT {6 >= $_p}? '+' (= b e) {$v = $a.v + $b.v;})
* (ALT {3 >= $_p}? '++') (ALT {2 >= $_p}? '--'))))))
* </pre>
*/
public void setAltASTPointers(LeftRecursiveRule r, RuleAST t) {
// System.out.println("RULE: "+t.toStringTree());
BlockAST ruleBlk = (BlockAST) t.getFirstChildWithType(ANTLRParser.BLOCK);
AltAST mainAlt = (AltAST) ruleBlk.getChild(0);
BlockAST primaryBlk = (BlockAST) mainAlt.getChild(0);
// (* BLOCK ...)
BlockAST opsBlk = (BlockAST) mainAlt.getChild(1).getChild(0);
for (int i = 0; i < r.recPrimaryAlts.size(); i++) {
LeftRecursiveRuleAltInfo altInfo = r.recPrimaryAlts.get(i);
altInfo.altAST = (AltAST) primaryBlk.getChild(i);
altInfo.altAST.leftRecursiveAltInfo = altInfo;
altInfo.originalAltAST.leftRecursiveAltInfo = altInfo;
// altInfo.originalAltAST.parent = altInfo.altAST.parent;
// System.out.println(altInfo.altAST.toStringTree());
}
for (int i = 0; i < r.recOpAlts.size(); i++) {
LeftRecursiveRuleAltInfo altInfo = r.recOpAlts.getElement(i);
altInfo.altAST = (AltAST) opsBlk.getChild(i);
altInfo.altAST.leftRecursiveAltInfo = altInfo;
altInfo.originalAltAST.leftRecursiveAltInfo = altInfo;
// altInfo.originalAltAST.parent = altInfo.altAST.parent;
// System.out.println(altInfo.altAST.toStringTree());
}
}
use of org.antlr.v4.tool.ast.BlockAST in project antlr4 by antlr.
the class ParserATNFactory method block.
/**
* From {@code A|B|..|Z} alternative block build
*
* <pre>
* o->o-A->o->o (last ATNState is BlockEndState pointed to by all alts)
* | ^
* |->o-B->o--|
* | |
* ... |
* | |
* |->o-Z->o--|
* </pre>
*
* So start node points at every alternative with epsilon transition and
* every alt right side points at a block end ATNState.
* <p>
* Special case: only one alternative: don't make a block with alt
* begin/end.
* <p>
* Special case: if just a list of tokens/chars/sets, then collapse to a
* single edged o-set->o graph.
* <p>
* TODO: Set alt number (1..n) in the states?
*/
@Override
public Handle block(BlockAST blkAST, GrammarAST ebnfRoot, List<Handle> alts) {
if (ebnfRoot == null) {
if (alts.size() == 1) {
Handle h = alts.get(0);
blkAST.atnState = h.left;
return h;
}
BlockStartState start = newState(BasicBlockStartState.class, blkAST);
if (alts.size() > 1)
atn.defineDecisionState(start);
return makeBlock(start, blkAST, alts);
}
switch(ebnfRoot.getType()) {
case ANTLRParser.OPTIONAL:
BlockStartState start = newState(BasicBlockStartState.class, blkAST);
atn.defineDecisionState(start);
Handle h = makeBlock(start, blkAST, alts);
return optional(ebnfRoot, h);
case ANTLRParser.CLOSURE:
BlockStartState star = newState(StarBlockStartState.class, ebnfRoot);
if (alts.size() > 1)
atn.defineDecisionState(star);
h = makeBlock(star, blkAST, alts);
return star(ebnfRoot, h);
case ANTLRParser.POSITIVE_CLOSURE:
PlusBlockStartState plus = newState(PlusBlockStartState.class, ebnfRoot);
if (alts.size() > 1)
atn.defineDecisionState(plus);
h = makeBlock(plus, blkAST, alts);
return plus(ebnfRoot, h);
}
return null;
}
Aggregations