Search in sources :

Example 26 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class GrammarTransformPipeline method extractImplicitLexer.

/** Build lexer grammar from combined grammar that looks like:
	 *
	 *  (COMBINED_GRAMMAR A
	 *      (tokens { X (= Y 'y'))
	 *      (OPTIONS (= x 'y'))
	 *      (@ members {foo})
	 *      (@ lexer header {package jj;})
	 *      (RULES (RULE .+)))
	 *
	 *  Move rules and actions to new tree, don't dup. Split AST apart.
	 *  We'll have this Grammar share token symbols later; don't generate
	 *  tokenVocab or tokens{} section.  Copy over named actions.
	 *
	 *  Side-effects: it removes children from GRAMMAR & RULES nodes
	 *                in combined AST.  Anything cut out is dup'd before
	 *                adding to lexer to avoid "who's ur daddy" issues
	 */
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
    GrammarRootAST combinedAST = combinedGrammar.ast;
    //tool.log("grammar", "before="+combinedAST.toStringTree());
    GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
    GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
    // MAKE A GRAMMAR ROOT and ID
    String lexerName = combinedAST.getChild(0).getText() + "Lexer";
    GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
    lexerAST.grammarType = ANTLRParser.LEXER;
    lexerAST.token.setInputStream(combinedAST.token.getInputStream());
    lexerAST.addChild((GrammarAST) adaptor.create(ANTLRParser.ID, lexerName));
    // COPY OPTIONS
    GrammarAST optionsRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
    if (optionsRoot != null && optionsRoot.getChildCount() != 0) {
        GrammarAST lexerOptionsRoot = (GrammarAST) adaptor.dupNode(optionsRoot);
        lexerAST.addChild(lexerOptionsRoot);
        GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
        for (GrammarAST o : options) {
            String optionName = o.getChild(0).getText();
            if (Grammar.lexerOptions.contains(optionName) && !Grammar.doNotCopyOptionsToLexer.contains(optionName)) {
                GrammarAST optionTree = (GrammarAST) adaptor.dupTree(o);
                lexerOptionsRoot.addChild(optionTree);
                lexerAST.setOption(optionName, (GrammarAST) optionTree.getChild(1));
            }
        }
    }
    // COPY all named actions, but only move those with lexer:: scope
    List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
    for (GrammarAST e : elements) {
        if (e.getType() == ANTLRParser.AT) {
            lexerAST.addChild((Tree) adaptor.dupTree(e));
            if (e.getChild(0).getText().equals("lexer")) {
                actionsWeMoved.add(e);
            }
        }
    }
    for (GrammarAST r : actionsWeMoved) {
        combinedAST.deleteChild(r);
    }
    GrammarAST combinedRulesRoot = (GrammarAST) combinedAST.getFirstChildWithType(ANTLRParser.RULES);
    if (combinedRulesRoot == null)
        return lexerAST;
    // MOVE lexer rules
    GrammarAST lexerRulesRoot = (GrammarAST) adaptor.create(ANTLRParser.RULES, "RULES");
    lexerAST.addChild(lexerRulesRoot);
    List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
    GrammarASTWithOptions[] rules;
    if (combinedRulesRoot.getChildCount() > 0) {
        rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
    } else {
        rules = new GrammarASTWithOptions[0];
    }
    for (GrammarASTWithOptions r : rules) {
        String ruleName = r.getChild(0).getText();
        if (Grammar.isTokenName(ruleName)) {
            lexerRulesRoot.addChild((Tree) adaptor.dupTree(r));
            rulesWeMoved.add(r);
        }
    }
    for (GrammarAST r : rulesWeMoved) {
        combinedRulesRoot.deleteChild(r);
    }
    // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
    List<Pair<GrammarAST, GrammarAST>> litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
    Set<String> stringLiterals = combinedGrammar.getStringLiterals();
    // add strings from combined grammar (and imported grammars) into lexer
    // put them first as they are keywords; must resolve ambigs to these rules
    //		tool.log("grammar", "strings from parser: "+stringLiterals);
    int insertIndex = 0;
    nextLit: for (String lit : stringLiterals) {
        // if lexer already has a rule for literal, continue
        if (litAliases != null) {
            for (Pair<GrammarAST, GrammarAST> pair : litAliases) {
                GrammarAST litAST = pair.b;
                if (lit.equals(litAST.getText()))
                    continue nextLit;
            }
        }
        // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
        String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
        // can't use wizard; need special node types
        GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
        BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
        AltAST alt = new AltAST(ANTLRParser.ALT);
        TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
        alt.addChild(slit);
        blk.addChild(alt);
        CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
        litRule.addChild(new TerminalAST(idToken));
        litRule.addChild(blk);
        lexerRulesRoot.insertChild(insertIndex, litRule);
        //			lexerRulesRoot.getChildren().add(0, litRule);
        // reset indexes and set litRule parent
        lexerRulesRoot.freshenParentAndChildIndexes();
        // next literal will be added after the one just added
        insertIndex++;
    }
    // TODO: take out after stable if slow
    lexerAST.sanityCheckParentAndChildIndexes();
    combinedAST.sanityCheckParentAndChildIndexes();
    //		tool.log("grammar", combinedAST.toTokenString());
    combinedGrammar.tool.log("grammar", "after extract implicit lexer =" + combinedAST.toStringTree());
    combinedGrammar.tool.log("grammar", "lexer =" + lexerAST.toStringTree());
    if (lexerRulesRoot.getChildCount() == 0)
        return null;
    return lexerAST;
}
Also used : RuleAST(org.antlr.v4.tool.ast.RuleAST) GrammarRootAST(org.antlr.v4.tool.ast.GrammarRootAST) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) ArrayList(java.util.ArrayList) BlockAST(org.antlr.v4.tool.ast.BlockAST) AltAST(org.antlr.v4.tool.ast.AltAST) TerminalAST(org.antlr.v4.tool.ast.TerminalAST) GrammarASTAdaptor(org.antlr.v4.parse.GrammarASTAdaptor) CommonToken(org.antlr.runtime.CommonToken) GrammarASTWithOptions(org.antlr.v4.tool.ast.GrammarASTWithOptions) Pair(org.antlr.v4.runtime.misc.Pair)

Example 27 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class GrammarTransformPipeline method expandParameterizedLoops.

/** Find and replace
     *      ID*[','] with ID (',' ID)*
     *      ID+[','] with ID (',' ID)+
     *      (x {action} y)+[','] with x {action} y (',' x {action} y)+
     *
     *  Parameter must be a token.
     *  todo: do we want?
     */
public void expandParameterizedLoops(GrammarAST root) {
    TreeVisitor v = new TreeVisitor(new GrammarASTAdaptor());
    v.visit(root, new TreeVisitorAction() {

        @Override
        public Object pre(Object t) {
            if (((GrammarAST) t).getType() == 3) {
                return expandParameterizedLoop((GrammarAST) t);
            }
            return t;
        }

        @Override
        public Object post(Object t) {
            return t;
        }
    });
}
Also used : TreeVisitor(org.antlr.runtime.tree.TreeVisitor) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) GrammarASTAdaptor(org.antlr.v4.parse.GrammarASTAdaptor) TreeVisitorAction(org.antlr.runtime.tree.TreeVisitorAction)

Example 28 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class TestBufferedTokenStream method testCompleteBufferAfterConsuming.

@Test
public void testCompleteBufferAfterConsuming() throws Exception {
    LexerGrammar g = new LexerGrammar("lexer grammar t;\n" + "ID : 'a'..'z'+;\n" + "INT : '0'..'9'+;\n" + "SEMI : ';';\n" + "ASSIGN : '=';\n" + "PLUS : '+';\n" + "MULT : '*';\n" + "WS : ' '+;\n");
    // Tokens: 012345678901234567
    // Input:  x = 3 * 0 + 2 * 0;
    CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;");
    LexerInterpreter lexEngine = g.createLexerInterpreter(input);
    TokenStream tokens = createTokenStream(lexEngine);
    Token t = tokens.LT(1);
    while (t.getType() != Token.EOF) {
        tokens.consume();
        t = tokens.LT(1);
    }
    String result = tokens.getText();
    String expecting = "x = 3 * 0 + 2 * 0;";
    assertEquals(expecting, result);
}
Also used : LexerInterpreter(org.antlr.v4.runtime.LexerInterpreter) TokenStream(org.antlr.v4.runtime.TokenStream) BufferedTokenStream(org.antlr.v4.runtime.BufferedTokenStream) Token(org.antlr.v4.runtime.Token) LexerGrammar(org.antlr.v4.tool.LexerGrammar) CharStream(org.antlr.v4.runtime.CharStream) ANTLRInputStream(org.antlr.v4.runtime.ANTLRInputStream) Test(org.junit.Test)

Example 29 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class TestBufferedTokenStream method testLookback.

@Test
public void testLookback() throws Exception {
    LexerGrammar g = new LexerGrammar("lexer grammar t;\n" + "ID : 'a'..'z'+;\n" + "INT : '0'..'9'+;\n" + "SEMI : ';';\n" + "ASSIGN : '=';\n" + "PLUS : '+';\n" + "MULT : '*';\n" + "WS : ' '+;\n");
    // Tokens: 012345678901234567
    // Input:  x = 3 * 0 + 2 * 0;
    CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;");
    LexerInterpreter lexEngine = g.createLexerInterpreter(input);
    TokenStream tokens = createTokenStream(lexEngine);
    // get x into buffer
    tokens.consume();
    Token t = tokens.LT(-1);
    assertEquals("x", t.getText());
    tokens.consume();
    // consume '='
    tokens.consume();
    t = tokens.LT(-3);
    assertEquals("x", t.getText());
    t = tokens.LT(-2);
    assertEquals(" ", t.getText());
    t = tokens.LT(-1);
    assertEquals("=", t.getText());
}
Also used : LexerInterpreter(org.antlr.v4.runtime.LexerInterpreter) TokenStream(org.antlr.v4.runtime.TokenStream) BufferedTokenStream(org.antlr.v4.runtime.BufferedTokenStream) Token(org.antlr.v4.runtime.Token) LexerGrammar(org.antlr.v4.tool.LexerGrammar) CharStream(org.antlr.v4.runtime.CharStream) ANTLRInputStream(org.antlr.v4.runtime.ANTLRInputStream) Test(org.junit.Test)

Example 30 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class TestCommonTokenStream method testSingleEOF.

@Test
public void testSingleEOF() throws Exception {
    TokenSource lexer = new TokenSource() {

        @Override
        public Token nextToken() {
            return new CommonToken(Token.EOF);
        }

        @Override
        public int getLine() {
            return 0;
        }

        @Override
        public int getCharPositionInLine() {
            return 0;
        }

        @Override
        public CharStream getInputStream() {
            return null;
        }

        @Override
        public String getSourceName() {
            return IntStream.UNKNOWN_SOURCE_NAME;
        }

        @Override
        public TokenFactory<?> getTokenFactory() {
            throw new UnsupportedOperationException("Not supported yet.");
        }

        @Override
        public void setTokenFactory(TokenFactory<?> factory) {
            throw new UnsupportedOperationException("Not supported yet.");
        }
    };
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    tokens.fill();
    assertEquals(Token.EOF, tokens.LA(1));
    assertEquals(0, tokens.index());
    assertEquals(1, tokens.size());
}
Also used : CommonTokenStream(org.antlr.v4.runtime.CommonTokenStream) TokenSource(org.antlr.v4.runtime.TokenSource) TokenFactory(org.antlr.v4.runtime.TokenFactory) CommonToken(org.antlr.v4.runtime.CommonToken) Test(org.junit.Test)

Aggregations

Token (org.antlr.v4.runtime.Token)37 Test (org.junit.Test)26 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)18 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)16 ArrayList (java.util.ArrayList)14 ANTLRInputStream (org.antlr.v4.runtime.ANTLRInputStream)12 Grammar (org.antlr.v4.tool.Grammar)12 LexerGrammar (org.antlr.v4.tool.LexerGrammar)12 Token (org.antlr.runtime.Token)10 CommonTokenStream (org.antlr.v4.runtime.CommonTokenStream)10 TerminalNode (org.antlr.v4.runtime.tree.TerminalNode)10 CharStream (org.antlr.v4.runtime.CharStream)9 CommonToken (org.antlr.v4.runtime.CommonToken)8 ParserRuleContext (org.antlr.v4.runtime.ParserRuleContext)8 ParseTree (org.antlr.v4.runtime.tree.ParseTree)8 Rule (org.antlr.v4.tool.Rule)8 LexerInterpreter (org.antlr.v4.runtime.LexerInterpreter)7 TokenSource (org.antlr.v4.runtime.TokenSource)7 StringReader (java.io.StringReader)6 BaseRuntimeTest (org.antlr.v4.test.runtime.BaseRuntimeTest)6