Search in sources :

Example 21 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class ParserRuleContext method addChild.

/** Add a child to this node based upon matchedToken. It
	 *  creates a TerminalNodeImpl rather than using
	 *  {@link Parser#createTerminalNode(ParserRuleContext, Token)}. I'm leaving this
     *  in for compatibility but the parser doesn't use this anymore.
	 */
@Deprecated
public TerminalNode addChild(Token matchedToken) {
    TerminalNodeImpl t = new TerminalNodeImpl(matchedToken);
    addAnyChild(t);
    t.setParent(this);
    return t;
}
Also used : TerminalNodeImpl(org.antlr.v4.runtime.tree.TerminalNodeImpl)

Example 22 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class ATN method getExpectedTokens.

/**
	 * Computes the set of input symbols which could follow ATN state number
	 * {@code stateNumber} in the specified full {@code context}. This method
	 * considers the complete parser context, but does not evaluate semantic
	 * predicates (i.e. all predicates encountered during the calculation are
	 * assumed true). If a path in the ATN exists from the starting state to the
	 * {@link RuleStopState} of the outermost context without matching any
	 * symbols, {@link Token#EOF} is added to the returned set.
	 *
	 * <p>If {@code context} is {@code null}, it is treated as {@link ParserRuleContext#EMPTY}.</p>
	 *
	 * Note that this does NOT give you the set of all tokens that could
	 * appear at a given token position in the input phrase.  In other words,
	 * it does not answer:
	 *
	 *   "Given a specific partial input phrase, return the set of all tokens
	 *    that can follow the last token in the input phrase."
	 *
	 * The big difference is that with just the input, the parser could
	 * land right in the middle of a lookahead decision. Getting
     * all *possible* tokens given a partial input stream is a separate
     * computation. See https://github.com/antlr/antlr4/issues/1428
	 *
	 * For this function, we are specifying an ATN state and call stack to compute
	 * what token(s) can come next and specifically: outside of a lookahead decision.
	 * That is what you want for error reporting and recovery upon parse error.
	 *
	 * @param stateNumber the ATN state number
	 * @param context the full parse context
	 * @return The set of potentially valid input symbols which could follow the
	 * specified state in the specified context.
	 * @throws IllegalArgumentException if the ATN does not contain a state with
	 * number {@code stateNumber}
	 */
public IntervalSet getExpectedTokens(int stateNumber, RuleContext context) {
    if (stateNumber < 0 || stateNumber >= states.size()) {
        throw new IllegalArgumentException("Invalid state number.");
    }
    RuleContext ctx = context;
    ATNState s = states.get(stateNumber);
    IntervalSet following = nextTokens(s);
    if (!following.contains(Token.EPSILON)) {
        return following;
    }
    IntervalSet expected = new IntervalSet();
    expected.addAll(following);
    expected.remove(Token.EPSILON);
    while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
        ATNState invokingState = states.get(ctx.invokingState);
        RuleTransition rt = (RuleTransition) invokingState.transition(0);
        following = nextTokens(rt.followState);
        expected.addAll(following);
        expected.remove(Token.EPSILON);
        ctx = ctx.parent;
    }
    if (following.contains(Token.EPSILON)) {
        expected.add(Token.EOF);
    }
    return expected;
}
Also used : ParserRuleContext(org.antlr.v4.runtime.ParserRuleContext) RuleContext(org.antlr.v4.runtime.RuleContext) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 23 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class Grammar method getStateToGrammarRegionMap.

public static Map<Integer, Interval> getStateToGrammarRegionMap(GrammarRootAST ast, IntervalSet grammarTokenTypes) {
    Map<Integer, Interval> stateToGrammarRegionMap = new HashMap<Integer, Interval>();
    if (ast == null)
        return stateToGrammarRegionMap;
    List<GrammarAST> nodes = ast.getNodesWithType(grammarTokenTypes);
    for (GrammarAST n : nodes) {
        if (n.atnState != null) {
            Interval tokenRegion = Interval.of(n.getTokenStartIndex(), n.getTokenStopIndex());
            org.antlr.runtime.tree.Tree ruleNode = null;
            // RULEs, BLOCKs of transformed recursive rules point to original token interval
            switch(n.getType()) {
                case ANTLRParser.RULE:
                    ruleNode = n;
                    break;
                case ANTLRParser.BLOCK:
                case ANTLRParser.CLOSURE:
                    ruleNode = n.getAncestor(ANTLRParser.RULE);
                    break;
            }
            if (ruleNode instanceof RuleAST) {
                String ruleName = ((RuleAST) ruleNode).getRuleName();
                Rule r = ast.g.getRule(ruleName);
                if (r instanceof LeftRecursiveRule) {
                    RuleAST originalAST = ((LeftRecursiveRule) r).getOriginalAST();
                    tokenRegion = Interval.of(originalAST.getTokenStartIndex(), originalAST.getTokenStopIndex());
                }
            }
            stateToGrammarRegionMap.put(n.atnState.stateNumber, tokenRegion);
        }
    }
    return stateToGrammarRegionMap;
}
Also used : RuleAST(org.antlr.v4.tool.ast.RuleAST) OrderedHashMap(org.antlr.v4.misc.OrderedHashMap) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) Interval(org.antlr.v4.runtime.misc.Interval)

Example 24 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class GrammarParserInterpreter method getAllPossibleParseTrees.

/** Given an ambiguous parse information, return the list of ambiguous parse trees.
	 *  An ambiguity occurs when a specific token sequence can be recognized
	 *  in more than one way by the grammar. These ambiguities are detected only
	 *  at decision points.
	 *
	 *  The list of trees includes the actual interpretation (that for
	 *  the minimum alternative number) and all ambiguous alternatives.
	 *  The actual interpretation is always first.
	 *
	 *  This method reuses the same physical input token stream used to
	 *  detect the ambiguity by the original parser in the first place.
	 *  This method resets/seeks within but does not alter originalParser.
	 *
	 *  The trees are rooted at the node whose start..stop token indices
	 *  include the start and stop indices of this ambiguity event. That is,
	 *  the trees returned will always include the complete ambiguous subphrase
	 *  identified by the ambiguity event.  The subtrees returned will
	 *  also always contain the node associated with the overridden decision.
	 *
	 *  Be aware that this method does NOT notify error or parse listeners as
	 *  it would trigger duplicate or otherwise unwanted events.
	 *
	 *  This uses a temporary ParserATNSimulator and a ParserInterpreter
	 *  so we don't mess up any statistics, event lists, etc...
	 *  The parse tree constructed while identifying/making ambiguityInfo is
	 *  not affected by this method as it creates a new parser interp to
	 *  get the ambiguous interpretations.
	 *
	 *  Nodes in the returned ambig trees are independent of the original parse
	 *  tree (constructed while identifying/creating ambiguityInfo).
	 *
	 *  @since 4.5.1
	 *
	 *  @param g              From which grammar should we drive alternative
	 *                        numbers and alternative labels.
	 *
	 *  @param originalParser The parser used to create ambiguityInfo; it
	 *                        is not modified by this routine and can be either
	 *                        a generated or interpreted parser. It's token
	 *                        stream *is* reset/seek()'d.
	 *  @param tokens		  A stream of tokens to use with the temporary parser.
	 *                        This will often be just the token stream within the
	 *                        original parser but here it is for flexibility.
	 *
	 *  @param decision       Which decision to try different alternatives for.
	 *
	 *  @param alts           The set of alternatives to try while re-parsing.
	 *
	 *  @param startIndex	  The index of the first token of the ambiguous
	 *                        input or other input of interest.
	 *
	 *  @param stopIndex      The index of the last token of the ambiguous input.
	 *                        The start and stop indexes are used primarily to
	 *                        identify how much of the resulting parse tree
	 *                        to return.
	 *
	 *  @param startRuleIndex The start rule for the entire grammar, not
	 *                        the ambiguous decision. We re-parse the entire input
	 *                        and so we need the original start rule.
	 *
	 *  @return               The list of all possible interpretations of
	 *                        the input for the decision in ambiguityInfo.
	 *                        The actual interpretation chosen by the parser
	 *                        is always given first because this method
	 *                        retests the input in alternative order and
	 *                        ANTLR always resolves ambiguities by choosing
	 *                        the first alternative that matches the input.
	 *                        The subtree returned
	 *
	 *  @throws RecognitionException Throws upon syntax error while matching
	 *                               ambig input.
	 */
public static List<ParserRuleContext> getAllPossibleParseTrees(Grammar g, Parser originalParser, TokenStream tokens, int decision, BitSet alts, int startIndex, int stopIndex, int startRuleIndex) throws RecognitionException {
    List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
    // Create a new parser interpreter to parse the ambiguous subphrase
    ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
    if (stopIndex >= (tokens.size() - 1)) {
        // if we are pointing at EOF token
        // EOF is not in tree, so must be 1 less than last non-EOF token
        stopIndex = tokens.size() - 2;
    }
    // get ambig trees
    int alt = alts.nextSetBit(0);
    while (alt >= 0) {
        // re-parse entire input for all ambiguous alternatives
        // (don't have to do first as it's been parsed, but do again for simplicity
        //  using this temp parser.)
        parser.reset();
        parser.addDecisionOverride(decision, startIndex, alt);
        ParserRuleContext t = parser.parse(startRuleIndex);
        GrammarInterpreterRuleContext ambigSubTree = (GrammarInterpreterRuleContext) Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex);
        // Use higher of overridden decision tree or tree enclosing all tokens
        if (Trees.isAncestorOf(parser.getOverrideDecisionRoot(), ambigSubTree)) {
            ambigSubTree = (GrammarInterpreterRuleContext) parser.getOverrideDecisionRoot();
        }
        trees.add(ambigSubTree);
        alt = alts.nextSetBit(alt + 1);
    }
    return trees;
}
Also used : ParserRuleContext(org.antlr.v4.runtime.ParserRuleContext) ParserInterpreter(org.antlr.v4.runtime.ParserInterpreter) ArrayList(java.util.ArrayList)

Example 25 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class GrammarParserInterpreter method getLookaheadParseTrees.

/** Return a list of parse trees, one for each alternative in a decision
	 *  given the same input.
	 *
	 *  Very similar to {@link #getAllPossibleParseTrees} except
	 *  that it re-parses the input for every alternative in a decision,
	 *  not just the ambiguous ones (there is no alts parameter here).
	 *  This method also tries to reduce the size of the parse trees
	 *  by stripping away children of the tree that are completely out of range
	 *  of startIndex..stopIndex. Also, because errors are expected, we
	 *  use a specialized error handler that more or less bails out
	 *  but that also consumes the first erroneous token at least. This
	 *  ensures that an error node will be in the parse tree for display.
	 *
	 *  NOTES:
    // we must parse the entire input now with decision overrides
	// we cannot parse a subset because it could be that a decision
	// above our decision of interest needs to read way past
	// lookaheadInfo.stopIndex. It seems like there is no escaping
	// the use of a full and complete token stream if we are
	// resetting to token index 0 and re-parsing from the start symbol.
	// It's not easy to restart parsing somewhere in the middle like a
	// continuation because our call stack does not match the
	// tree stack because of left recursive rule rewriting. grrrr!
	 *
	 * @since 4.5.1
	 */
public static List<ParserRuleContext> getLookaheadParseTrees(Grammar g, ParserInterpreter originalParser, TokenStream tokens, int startRuleIndex, int decision, int startIndex, int stopIndex) {
    List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
    // Create a new parser interpreter to parse the ambiguous subphrase
    ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
    DecisionState decisionState = originalParser.getATN().decisionToState.get(decision);
    for (int alt = 1; alt <= decisionState.getTransitions().length; alt++) {
        // re-parse entire input for all ambiguous alternatives
        // (don't have to do first as it's been parsed, but do again for simplicity
        //  using this temp parser.)
        GrammarParserInterpreter.BailButConsumeErrorStrategy errorHandler = new GrammarParserInterpreter.BailButConsumeErrorStrategy();
        parser.setErrorHandler(errorHandler);
        parser.reset();
        parser.addDecisionOverride(decision, startIndex, alt);
        ParserRuleContext tt = parser.parse(startRuleIndex);
        int stopTreeAt = stopIndex;
        if (errorHandler.firstErrorTokenIndex >= 0) {
            // cut off rest at first error
            stopTreeAt = errorHandler.firstErrorTokenIndex;
        }
        Interval overallRange = tt.getSourceInterval();
        if (stopTreeAt > overallRange.b) {
            // If we try to look beyond range of tree, stopTreeAt must be EOF
            // for which there is no EOF ref in grammar. That means tree
            // will not have node for stopTreeAt; limit to overallRange.b
            stopTreeAt = overallRange.b;
        }
        ParserRuleContext subtree = Trees.getRootOfSubtreeEnclosingRegion(tt, startIndex, stopTreeAt);
        // Use higher of overridden decision tree or tree enclosing all tokens
        if (Trees.isAncestorOf(parser.getOverrideDecisionRoot(), subtree)) {
            subtree = parser.getOverrideDecisionRoot();
        }
        Trees.stripChildrenOutOfRange(subtree, parser.getOverrideDecisionRoot(), startIndex, stopTreeAt);
        trees.add(subtree);
    }
    return trees;
}
Also used : ParserRuleContext(org.antlr.v4.runtime.ParserRuleContext) ParserInterpreter(org.antlr.v4.runtime.ParserInterpreter) ArrayList(java.util.ArrayList) DecisionState(org.antlr.v4.runtime.atn.DecisionState) Interval(org.antlr.v4.runtime.misc.Interval)

Aggregations

Token (org.antlr.v4.runtime.Token)37 Test (org.junit.Test)26 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)18 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)16 ArrayList (java.util.ArrayList)14 ANTLRInputStream (org.antlr.v4.runtime.ANTLRInputStream)12 Grammar (org.antlr.v4.tool.Grammar)12 LexerGrammar (org.antlr.v4.tool.LexerGrammar)12 Token (org.antlr.runtime.Token)10 CommonTokenStream (org.antlr.v4.runtime.CommonTokenStream)10 TerminalNode (org.antlr.v4.runtime.tree.TerminalNode)10 CharStream (org.antlr.v4.runtime.CharStream)9 CommonToken (org.antlr.v4.runtime.CommonToken)8 ParserRuleContext (org.antlr.v4.runtime.ParserRuleContext)8 ParseTree (org.antlr.v4.runtime.tree.ParseTree)8 Rule (org.antlr.v4.tool.Rule)8 LexerInterpreter (org.antlr.v4.runtime.LexerInterpreter)7 TokenSource (org.antlr.v4.runtime.TokenSource)7 StringReader (java.io.StringReader)6 BaseRuntimeTest (org.antlr.v4.test.runtime.BaseRuntimeTest)6