use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by antlr.
the class GrammarTransformPipeline method extractImplicitLexer.
/**
* Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section. Copy over named actions.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
// tool.log("grammar", "before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText() + "Lexer";
GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((GrammarAST) adaptor.create(ANTLRParser.ID, lexerName));
// COPY OPTIONS
GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
GrammarAST lexerOptionsRoot = (GrammarAST) adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
lexerOptionsRoot.addChild(optionTree);
lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
}
}
}
// COPY all named actions, but only move those with lexer:: scope
List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
for (GrammarAST e : elements) {
if (e.getType() == ANTLRParser.AT) {
lexerAST.addChild((Tree) adaptor.dupTree(e));
if (e.getChild(0).getText().equals("lexer")) {
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild(r);
}
GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if (combinedRulesRoot == null)
return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot = (GrammarAST) adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
GrammarASTWithOptions[] rules;
if (combinedRulesRoot.getChildCount() > 0) {
rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
} else {
rules = new GrammarASTWithOptions[0];
}
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if (Grammar.isTokenName(ruleName)) {
lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild(r);
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
List<Pair<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
Set<String> stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// tool.log("grammar", "strings from parser: "+stringLiterals);
int insertIndex = 0;
nextLit: for (String lit : stringLiterals) {
// if lexer already has a rule for literal, continue
if (litAliases != null) {
for (Pair<GrammarAST, GrammarAST> pair : litAliases) {
GrammarAST litAST = pair.b;
if (lit.equals(litAST.getText()))
continue nextLit;
}
}
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.insertChild(insertIndex, litRule);
// lexerRulesRoot.getChildren().add(0, litRule);
// reset indexes and set litRule parent
lexerRulesRoot.freshenParentAndChildIndexes();
// next literal will be added after the one just added
insertIndex++;
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// tool.log("grammar", combinedAST.toTokenString());
combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
if (lexerRulesRoot.getChildCount() == 0)
return null;
return lexerAST;
}
use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by antlr.
the class GrammarASTAdaptor method create.
@Override
public /**
* Make sure even imaginary nodes know the input stream
*/
Object create(int tokenType, String text) {
GrammarAST t;
if (tokenType == ANTLRParser.RULE) {
// needed by TreeWizard to make RULE tree
t = new RuleAST(new CommonToken(tokenType, text));
} else if (tokenType == ANTLRParser.STRING_LITERAL) {
// implicit lexer construction done with wizard; needs this node type
// whereas grammar ANTLRParser.g can use token option to spec node type
t = new TerminalAST(new CommonToken(tokenType, text));
} else {
t = (GrammarAST) super.create(tokenType, text);
}
t.token.setInputStream(input);
return t;
}
use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by antlr.
the class BasicSemanticChecks method checkElementOptions.
/**
* Check option is appropriate for elem; parent of ID is ELEMENT_OPTIONS
*/
boolean checkElementOptions(GrammarASTWithOptions elem, GrammarAST ID, GrammarAST valueAST) {
if (checkAssocElementOption && ID != null && "assoc".equals(ID.getText())) {
if (elem.getType() != ANTLRParser.ALT) {
Token optionID = ID.token;
String fileName = optionID.getInputStream().getSourceName();
g.tool.errMgr.grammarError(ErrorType.UNRECOGNIZED_ASSOC_OPTION, fileName, optionID, currentRuleName);
}
}
if (elem instanceof RuleRefAST) {
return checkRuleRefOptions((RuleRefAST) elem, ID, valueAST);
}
if (elem instanceof TerminalAST) {
return checkTokenOptions((TerminalAST) elem, ID, valueAST);
}
if (elem.getType() == ANTLRParser.ACTION) {
return false;
}
if (elem.getType() == ANTLRParser.SEMPRED) {
Token optionID = ID.token;
String fileName = optionID.getInputStream().getSourceName();
if (valueAST != null && !Grammar.semPredOptions.contains(optionID.getText())) {
g.tool.errMgr.grammarError(ErrorType.ILLEGAL_OPTION, fileName, optionID, optionID.getText());
return false;
}
}
return false;
}
use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by tunnelvisionlabs.
the class Tool method checkForRuleIssues.
/**
* Important enough to avoid multiple definitions that we do very early,
* right after AST construction. Also check for undefined rules in
* parser/lexer to avoid exceptions later. Return true if we find multiple
* definitions of the same rule or a reference to an undefined rule or
* parser rule ref in lexer rule.
*/
public boolean checkForRuleIssues(final Grammar g) {
// check for redefined rules
GrammarAST RULES = (GrammarAST) g.ast.getFirstChildWithType(ANTLRParser.RULES);
List<GrammarAST> rules = new ArrayList<GrammarAST>(RULES.getAllChildrenWithType(ANTLRParser.RULE));
for (GrammarAST mode : g.ast.getAllChildrenWithType(ANTLRParser.MODE)) {
rules.addAll(mode.getAllChildrenWithType(ANTLRParser.RULE));
}
boolean redefinition = false;
final Map<String, RuleAST> ruleToAST = new HashMap<String, RuleAST>();
for (GrammarAST r : rules) {
RuleAST ruleAST = (RuleAST) r;
GrammarAST ID = (GrammarAST) ruleAST.getChild(0);
String ruleName = ID.getText();
RuleAST prev = ruleToAST.get(ruleName);
if (prev != null) {
GrammarAST prevChild = (GrammarAST) prev.getChild(0);
g.tool.errMgr.grammarError(ErrorType.RULE_REDEFINITION, g.fileName, ID.getToken(), ruleName, prevChild.getToken().getLine());
redefinition = true;
continue;
}
ruleToAST.put(ruleName, ruleAST);
}
// check for undefined rules
class UndefChecker extends GrammarTreeVisitor {
public boolean badref = false;
@Override
public void tokenRef(TerminalAST ref) {
if ("EOF".equals(ref.getText())) {
// this is a special predefined reference
return;
}
if (g.isLexer())
ruleRef(ref, null);
}
@Override
public void ruleRef(GrammarAST ref, ActionAST arg) {
RuleAST ruleAST = ruleToAST.get(ref.getText());
String fileName = ref.getToken().getInputStream().getSourceName();
if (Character.isUpperCase(currentRuleName.charAt(0)) && Character.isLowerCase(ref.getText().charAt(0))) {
badref = true;
errMgr.grammarError(ErrorType.PARSER_RULE_REF_IN_LEXER_RULE, fileName, ref.getToken(), ref.getText(), currentRuleName);
} else if (ruleAST == null) {
badref = true;
errMgr.grammarError(ErrorType.UNDEFINED_RULE_REF, fileName, ref.token, ref.getText());
}
}
@Override
public ErrorManager getErrorManager() {
return errMgr;
}
}
UndefChecker chk = new UndefChecker();
chk.visitGrammar(g.ast);
return redefinition || chk.badref;
}
use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by tunnelvisionlabs.
the class GrammarTransformPipeline method extractImplicitLexer.
/**
* Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section. Copy over named actions.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
// tool.log("grammar", "before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText() + "Lexer";
GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild(adaptor.create(ANTLRParser.ID, lexerName));
// COPY OPTIONS
GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
GrammarAST lexerOptionsRoot = adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
lexerOptionsRoot.addChild(optionTree);
lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
}
}
}
// COPY all named actions, but only move those with lexer:: scope
List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
for (GrammarAST e : elements) {
if (e.getType() == ANTLRParser.AT) {
lexerAST.addChild((Tree) adaptor.dupTree(e));
if (e.getChild(0).getText().equals("lexer")) {
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild(r);
}
GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if (combinedRulesRoot == null)
return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot = adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
GrammarASTWithOptions[] rules;
if (combinedRulesRoot.getChildCount() > 0) {
rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
} else {
rules = new GrammarASTWithOptions[0];
}
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if (Grammar.isTokenName(ruleName)) {
lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild(r);
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
List<Tuple2<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
Set<String> stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// tool.log("grammar", "strings from parser: "+stringLiterals);
int insertIndex = 0;
nextLit: for (String lit : stringLiterals) {
// if lexer already has a rule for literal, continue
if (litAliases != null) {
for (Tuple2<GrammarAST, GrammarAST> pair : litAliases) {
GrammarAST litAST = pair.getItem2();
if (lit.equals(litAST.getText()))
continue nextLit;
}
}
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.insertChild(insertIndex, litRule);
// lexerRulesRoot.getChildren().add(0, litRule);
// reset indexes and set litRule parent
lexerRulesRoot.freshenParentAndChildIndexes();
// next literal will be added after the one just added
insertIndex++;
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// tool.log("grammar", combinedAST.toTokenString());
combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
if (lexerRulesRoot.getChildCount() == 0)
return null;
return lexerAST;
}
Aggregations