Search in sources :

Example 11 with TerminalAST

use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by antlr.

the class GrammarTransformPipeline method extractImplicitLexer.

/**
 * Build lexer grammar from combined grammar that looks like:
 *
 *  (COMBINED_GRAMMAR A
 *      (tokens { X (= Y 'y'))
 *      (OPTIONS (= x 'y'))
 *      (@ members {foo})
 *      (@ lexer header {package jj;})
 *      (RULES (RULE .+)))
 *
 *  Move rules and actions to new tree, don't dup. Split AST apart.
 *  We'll have this Grammar share token symbols later; don't generate
 *  tokenVocab or tokens{} section.  Copy over named actions.
 *
 *  Side-effects: it removes children from GRAMMAR & RULES nodes
 *                in combined AST.  Anything cut out is dup'd before
 *                adding to lexer to avoid "who's ur daddy" issues
 */
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
    GrammarRootAST combinedAST = combinedGrammar.ast;
    // tool.log("grammar", "before="+combinedAST.toStringTree());
    GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
    GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
    // MAKE A GRAMMAR ROOT and ID
    String lexerName = combinedAST.getChild(0).getText() + "Lexer";
    GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
    lexerAST.grammarType = ANTLRParser.LEXER;
    lexerAST.token.setInputStream(combinedAST.token.getInputStream());
    lexerAST.addChild((GrammarAST) adaptor.create(ANTLRParser.ID, lexerName));
    // COPY OPTIONS
    GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
    if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
        GrammarAST lexerOptionsRoot = (GrammarAST) adaptor.dupNode(optionsRoot);
        lexerAST.addChild(lexerOptionsRoot);
        GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
        for (GrammarAST o : options) {
            String optionName = o.getChild(0).getText();
            if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
                GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
                lexerOptionsRoot.addChild(optionTree);
                lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
            }
        }
    }
    // COPY all named actions, but only move those with lexer:: scope
    List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
    for (GrammarAST e : elements) {
        if (e.getType() == ANTLRParser.AT) {
            lexerAST.addChild((Tree) adaptor.dupTree(e));
            if (e.getChild(0).getText().equals("lexer")) {
                actionsWeMoved.add(e);
            }
        }
    }
    for (GrammarAST r : actionsWeMoved) {
        combinedAST.deleteChild(r);
    }
    GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
    if (combinedRulesRoot == null)
        return lexerAST;
    // MOVE lexer rules
    GrammarAST lexerRulesRoot = (GrammarAST) adaptor.create(ANTLRParser.RULES, "RULES");
    lexerAST.addChild(lexerRulesRoot);
    List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
    GrammarASTWithOptions[] rules;
    if (combinedRulesRoot.getChildCount() > 0) {
        rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
    } else {
        rules = new GrammarASTWithOptions[0];
    }
    for (GrammarASTWithOptions r : rules) {
        String ruleName = r.getChild(0).getText();
        if (Grammar.isTokenName(ruleName)) {
            lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
            rulesWeMoved.add(r);
        }
    }
    for (GrammarAST r : rulesWeMoved) {
        combinedRulesRoot.deleteChild(r);
    }
    // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
    List<Pair<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
    Set<String> stringLiterals = combinedGrammar.getStringLiterals();
    // add strings from combined grammar (and imported grammars) into lexer
    // put them first as they are keywords; must resolve ambigs to these rules
    // tool.log("grammar", "strings from parser: "+stringLiterals);
    int insertIndex = 0;
    nextLit: for (String lit : stringLiterals) {
        // if lexer already has a rule for literal, continue
        if (litAliases != null) {
            for (Pair<GrammarAST, GrammarAST> pair : litAliases) {
                GrammarAST litAST = pair.b;
                if (lit.equals(litAST.getText()))
                    continue nextLit;
            }
        }
        // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
        String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
        // can't use wizard; need special node types
        GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
        BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
        AltAST alt = new AltAST(ANTLRParser.ALT);
        TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
        alt.addChild(slit);
        blk.addChild(alt);
        CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
        litRule.addChild(new TerminalAST(idToken));
        litRule.addChild(blk);
        lexerRulesRoot.insertChild(insertIndex, litRule);
        // lexerRulesRoot.getChildren().add(0, litRule);
        // reset indexes and set litRule parent
        lexerRulesRoot.freshenParentAndChildIndexes();
        // next literal will be added after the one just added
        insertIndex++;
    }
    // TODO: take out after stable if slow
    lexerAST.sanityCheckParentAndChildIndexes();
    combinedAST.sanityCheckParentAndChildIndexes();
    // tool.log("grammar", combinedAST.toTokenString());
    combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
    combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
    if (lexerRulesRoot.getChildCount() == 0)
        return null;
    return lexerAST;
}
Also used : RuleAST(org.antlr.v4.tool.ast.RuleAST) GrammarRootAST(org.antlr.v4.tool.ast.GrammarRootAST) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) ArrayList(java.util.ArrayList) BlockAST(org.antlr.v4.tool.ast.BlockAST) AltAST(org.antlr.v4.tool.ast.AltAST) TerminalAST(org.antlr.v4.tool.ast.TerminalAST) GrammarASTAdaptor(org.antlr.v4.parse.GrammarASTAdaptor) CommonToken(org.antlr.runtime.CommonToken) GrammarASTWithOptions(org.antlr.v4.tool.ast.GrammarASTWithOptions) Pair(org.antlr.v4.runtime.misc.Pair)

Example 12 with TerminalAST

use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by antlr.

the class GrammarASTAdaptor method create.

@Override
public /**
 * Make sure even imaginary nodes know the input stream
 */
Object create(int tokenType, String text) {
    GrammarAST t;
    if (tokenType == ANTLRParser.RULE) {
        // needed by TreeWizard to make RULE tree
        t = new RuleAST(new CommonToken(tokenType, text));
    } else if (tokenType == ANTLRParser.STRING_LITERAL) {
        // implicit lexer construction done with wizard; needs this node type
        // whereas grammar ANTLRParser.g can use token option to spec node type
        t = new TerminalAST(new CommonToken(tokenType, text));
    } else {
        t = (GrammarAST) super.create(tokenType, text);
    }
    t.token.setInputStream(input);
    return t;
}
Also used : RuleAST(org.antlr.v4.tool.ast.RuleAST) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) CommonToken(org.antlr.runtime.CommonToken) TerminalAST(org.antlr.v4.tool.ast.TerminalAST)

Example 13 with TerminalAST

use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by antlr.

the class BasicSemanticChecks method checkElementOptions.

/**
 * Check option is appropriate for elem; parent of ID is ELEMENT_OPTIONS
 */
boolean checkElementOptions(GrammarASTWithOptions elem, GrammarAST ID, GrammarAST valueAST) {
    if (checkAssocElementOption && ID != null && "assoc".equals(ID.getText())) {
        if (elem.getType() != ANTLRParser.ALT) {
            Token optionID = ID.token;
            String fileName = optionID.getInputStream().getSourceName();
            g.tool.errMgr.grammarError(ErrorType.UNRECOGNIZED_ASSOC_OPTION, fileName, optionID, currentRuleName);
        }
    }
    if (elem instanceof RuleRefAST) {
        return checkRuleRefOptions((RuleRefAST) elem, ID, valueAST);
    }
    if (elem instanceof TerminalAST) {
        return checkTokenOptions((TerminalAST) elem, ID, valueAST);
    }
    if (elem.getType() == ANTLRParser.ACTION) {
        return false;
    }
    if (elem.getType() == ANTLRParser.SEMPRED) {
        Token optionID = ID.token;
        String fileName = optionID.getInputStream().getSourceName();
        if (valueAST != null && !Grammar.semPredOptions.contains(optionID.getText())) {
            g.tool.errMgr.grammarError(ErrorType.ILLEGAL_OPTION, fileName, optionID, optionID.getText());
            return false;
        }
    }
    return false;
}
Also used : RuleRefAST(org.antlr.v4.tool.ast.RuleRefAST) Token(org.antlr.runtime.Token) TerminalAST(org.antlr.v4.tool.ast.TerminalAST)

Example 14 with TerminalAST

use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by tunnelvisionlabs.

the class Tool method checkForRuleIssues.

/**
 * Important enough to avoid multiple definitions that we do very early,
 * right after AST construction. Also check for undefined rules in
 * parser/lexer to avoid exceptions later. Return true if we find multiple
 * definitions of the same rule or a reference to an undefined rule or
 * parser rule ref in lexer rule.
 */
public boolean checkForRuleIssues(final Grammar g) {
    // check for redefined rules
    GrammarAST RULES = (GrammarAST) g.ast.getFirstChildWithType(ANTLRParser.RULES);
    List<GrammarAST> rules = new ArrayList<GrammarAST>(RULES.getAllChildrenWithType(ANTLRParser.RULE));
    for (GrammarAST mode : g.ast.getAllChildrenWithType(ANTLRParser.MODE)) {
        rules.addAll(mode.getAllChildrenWithType(ANTLRParser.RULE));
    }
    boolean redefinition = false;
    final Map<String, RuleAST> ruleToAST = new HashMap<String, RuleAST>();
    for (GrammarAST r : rules) {
        RuleAST ruleAST = (RuleAST) r;
        GrammarAST ID = (GrammarAST) ruleAST.getChild(0);
        String ruleName = ID.getText();
        RuleAST prev = ruleToAST.get(ruleName);
        if (prev != null) {
            GrammarAST prevChild = (GrammarAST) prev.getChild(0);
            g.tool.errMgr.grammarError(ErrorType.RULE_REDEFINITION, g.fileName, ID.getToken(), ruleName, prevChild.getToken().getLine());
            redefinition = true;
            continue;
        }
        ruleToAST.put(ruleName, ruleAST);
    }
    // check for undefined rules
    class UndefChecker extends GrammarTreeVisitor {

        public boolean badref = false;

        @Override
        public void tokenRef(TerminalAST ref) {
            if ("EOF".equals(ref.getText())) {
                // this is a special predefined reference
                return;
            }
            if (g.isLexer())
                ruleRef(ref, null);
        }

        @Override
        public void ruleRef(GrammarAST ref, ActionAST arg) {
            RuleAST ruleAST = ruleToAST.get(ref.getText());
            String fileName = ref.getToken().getInputStream().getSourceName();
            if (Character.isUpperCase(currentRuleName.charAt(0)) && Character.isLowerCase(ref.getText().charAt(0))) {
                badref = true;
                errMgr.grammarError(ErrorType.PARSER_RULE_REF_IN_LEXER_RULE, fileName, ref.getToken(), ref.getText(), currentRuleName);
            } else if (ruleAST == null) {
                badref = true;
                errMgr.grammarError(ErrorType.UNDEFINED_RULE_REF, fileName, ref.token, ref.getText());
            }
        }

        @Override
        public ErrorManager getErrorManager() {
            return errMgr;
        }
    }
    UndefChecker chk = new UndefChecker();
    chk.visitGrammar(g.ast);
    return redefinition || chk.badref;
}
Also used : RuleAST(org.antlr.v4.tool.ast.RuleAST) HashMap(java.util.HashMap) GrammarTreeVisitor(org.antlr.v4.parse.GrammarTreeVisitor) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) TerminalAST(org.antlr.v4.tool.ast.TerminalAST) ActionAST(org.antlr.v4.tool.ast.ActionAST)

Example 15 with TerminalAST

use of org.antlr.v4.tool.ast.TerminalAST in project antlr4 by tunnelvisionlabs.

the class GrammarTransformPipeline method extractImplicitLexer.

/**
 * Build lexer grammar from combined grammar that looks like:
 *
 *  (COMBINED_GRAMMAR A
 *      (tokens { X (= Y 'y'))
 *      (OPTIONS (= x 'y'))
 *      (@ members {foo})
 *      (@ lexer header {package jj;})
 *      (RULES (RULE .+)))
 *
 *  Move rules and actions to new tree, don't dup. Split AST apart.
 *  We'll have this Grammar share token symbols later; don't generate
 *  tokenVocab or tokens{} section.  Copy over named actions.
 *
 *  Side-effects: it removes children from GRAMMAR &amp; RULES nodes
 *                in combined AST.  Anything cut out is dup'd before
 *                adding to lexer to avoid "who's ur daddy" issues
 */
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
    GrammarRootAST combinedAST = combinedGrammar.ast;
    // tool.log("grammar", "before="+combinedAST.toStringTree());
    GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
    GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
    // MAKE A GRAMMAR ROOT and ID
    String lexerName = combinedAST.getChild(0).getText() + "Lexer";
    GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
    lexerAST.grammarType = ANTLRParser.LEXER;
    lexerAST.token.setInputStream(combinedAST.token.getInputStream());
    lexerAST.addChild(adaptor.create(ANTLRParser.ID, lexerName));
    // COPY OPTIONS
    GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
    if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
        GrammarAST lexerOptionsRoot = adaptor.dupNode(optionsRoot);
        lexerAST.addChild(lexerOptionsRoot);
        GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
        for (GrammarAST o : options) {
            String optionName = o.getChild(0).getText();
            if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
                GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
                lexerOptionsRoot.addChild(optionTree);
                lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
            }
        }
    }
    // COPY all named actions, but only move those with lexer:: scope
    List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
    for (GrammarAST e : elements) {
        if (e.getType() == ANTLRParser.AT) {
            lexerAST.addChild((Tree) adaptor.dupTree(e));
            if (e.getChild(0).getText().equals("lexer")) {
                actionsWeMoved.add(e);
            }
        }
    }
    for (GrammarAST r : actionsWeMoved) {
        combinedAST.deleteChild(r);
    }
    GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
    if (combinedRulesRoot == null)
        return lexerAST;
    // MOVE lexer rules
    GrammarAST lexerRulesRoot = adaptor.create(ANTLRParser.RULES, "RULES");
    lexerAST.addChild(lexerRulesRoot);
    List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
    GrammarASTWithOptions[] rules;
    if (combinedRulesRoot.getChildCount() > 0) {
        rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
    } else {
        rules = new GrammarASTWithOptions[0];
    }
    for (GrammarASTWithOptions r : rules) {
        String ruleName = r.getChild(0).getText();
        if (Grammar.isTokenName(ruleName)) {
            lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
            rulesWeMoved.add(r);
        }
    }
    for (GrammarAST r : rulesWeMoved) {
        combinedRulesRoot.deleteChild(r);
    }
    // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
    List<Tuple2<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
    Set<String> stringLiterals = combinedGrammar.getStringLiterals();
    // add strings from combined grammar (and imported grammars) into lexer
    // put them first as they are keywords; must resolve ambigs to these rules
    // tool.log("grammar", "strings from parser: "+stringLiterals);
    int insertIndex = 0;
    nextLit: for (String lit : stringLiterals) {
        // if lexer already has a rule for literal, continue
        if (litAliases != null) {
            for (Tuple2<GrammarAST, GrammarAST> pair : litAliases) {
                GrammarAST litAST = pair.getItem2();
                if (lit.equals(litAST.getText()))
                    continue nextLit;
            }
        }
        // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
        String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
        // can't use wizard; need special node types
        GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
        BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
        AltAST alt = new AltAST(ANTLRParser.ALT);
        TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
        alt.addChild(slit);
        blk.addChild(alt);
        CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
        litRule.addChild(new TerminalAST(idToken));
        litRule.addChild(blk);
        lexerRulesRoot.insertChild(insertIndex, litRule);
        // lexerRulesRoot.getChildren().add(0, litRule);
        // reset indexes and set litRule parent
        lexerRulesRoot.freshenParentAndChildIndexes();
        // next literal will be added after the one just added
        insertIndex++;
    }
    // TODO: take out after stable if slow
    lexerAST.sanityCheckParentAndChildIndexes();
    combinedAST.sanityCheckParentAndChildIndexes();
    // tool.log("grammar", combinedAST.toTokenString());
    combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
    combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
    if (lexerRulesRoot.getChildCount() == 0)
        return null;
    return lexerAST;
}
Also used : RuleAST(org.antlr.v4.tool.ast.RuleAST) GrammarRootAST(org.antlr.v4.tool.ast.GrammarRootAST) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) ArrayList(java.util.ArrayList) BlockAST(org.antlr.v4.tool.ast.BlockAST) AltAST(org.antlr.v4.tool.ast.AltAST) TerminalAST(org.antlr.v4.tool.ast.TerminalAST) Tuple2(org.antlr.v4.runtime.misc.Tuple2) GrammarASTAdaptor(org.antlr.v4.parse.GrammarASTAdaptor) CommonToken(org.antlr.runtime.CommonToken) GrammarASTWithOptions(org.antlr.v4.tool.ast.GrammarASTWithOptions)

Aggregations

TerminalAST (org.antlr.v4.tool.ast.TerminalAST)12 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)8 ArrayList (java.util.ArrayList)6 ATNState (org.antlr.v4.runtime.atn.ATNState)6 RuleAST (org.antlr.v4.tool.ast.RuleAST)6 CommonToken (org.antlr.runtime.CommonToken)4 GrammarTreeVisitor (org.antlr.v4.parse.GrammarTreeVisitor)4 AtomTransition (org.antlr.v4.runtime.atn.AtomTransition)4 AltAST (org.antlr.v4.tool.ast.AltAST)4 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 Token (org.antlr.runtime.Token)2 CommonTree (org.antlr.runtime.tree.CommonTree)2 Tree (org.antlr.runtime.tree.Tree)2 AddToLabelList (org.antlr.v4.codegen.model.AddToLabelList)2 LeftRecursiveRuleFunction (org.antlr.v4.codegen.model.LeftRecursiveRuleFunction)2 MatchToken (org.antlr.v4.codegen.model.MatchToken)2 RuleFunction (org.antlr.v4.codegen.model.RuleFunction)2 Decl (org.antlr.v4.codegen.model.decl.Decl)2