Search in sources :

Example 6 with Parser

use of org.antlr.v4.runtime.Parser in project antlr4 by antlr.

the class LL1Analyzer method LOOK.

/**
	 * Compute set of tokens that can follow {@code s} in the ATN in the
	 * specified {@code ctx}.
	 *
	 * <p>If {@code ctx} is {@code null} and the end of the rule containing
	 * {@code s} is reached, {@link Token#EPSILON} is added to the result set.
	 * If {@code ctx} is not {@code null} and the end of the outermost rule is
	 * reached, {@link Token#EOF} is added to the result set.</p>
	 *
	 * @param s the ATN state
	 * @param stopState the ATN state to stop at. This can be a
	 * {@link BlockEndState} to detect epsilon paths through a closure.
	 * @param ctx the complete parser context, or {@code null} if the context
	 * should be ignored
	 *
	 * @return The set of tokens that can follow {@code s} in the ATN in the
	 * specified {@code ctx}.
	 */
public IntervalSet LOOK(ATNState s, ATNState stopState, RuleContext ctx) {
    IntervalSet r = new IntervalSet();
    // ignore preds; get all lookahead
    boolean seeThruPreds = true;
    PredictionContext lookContext = ctx != null ? PredictionContext.fromRuleContext(s.atn, ctx) : null;
    _LOOK(s, stopState, lookContext, r, new HashSet<ATNConfig>(), new BitSet(), seeThruPreds, true);
    return r;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) BitSet(java.util.BitSet)

Example 7 with Parser

use of org.antlr.v4.runtime.Parser in project antlr4 by antlr.

the class Trees method getRootOfSubtreeEnclosingRegion.

/** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex
	 *  inclusively using postorder traversal.  Recursive depth-first-search.
	 *
	 *  @since 4.5.1
	 */
public static ParserRuleContext getRootOfSubtreeEnclosingRegion(ParseTree t, // inclusive
int startTokenIndex, // inclusive
int stopTokenIndex) {
    int n = t.getChildCount();
    for (int i = 0; i < n; i++) {
        ParseTree child = t.getChild(i);
        ParserRuleContext r = getRootOfSubtreeEnclosingRegion(child, startTokenIndex, stopTokenIndex);
        if (r != null)
            return r;
    }
    if (t instanceof ParserRuleContext) {
        ParserRuleContext r = (ParserRuleContext) t;
        if (// is range fully contained in t?
        startTokenIndex >= r.getStart().getTokenIndex() && (r.getStop() == null || stopTokenIndex <= r.getStop().getTokenIndex())) {
            // note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right
            return r;
        }
    }
    return null;
}
Also used : ParserRuleContext(org.antlr.v4.runtime.ParserRuleContext)

Example 8 with Parser

use of org.antlr.v4.runtime.Parser in project antlr4 by antlr.

the class DefaultErrorStrategy method reportUnwantedToken.

/**
	 * This method is called to report a syntax error which requires the removal
	 * of a token from the input stream. At the time this method is called, the
	 * erroneous symbol is current {@code LT(1)} symbol and has not yet been
	 * removed from the input stream. When this method returns,
	 * {@code recognizer} is in error recovery mode.
	 *
	 * <p>This method is called when {@link #singleTokenDeletion} identifies
	 * single-token deletion as a viable recovery strategy for a mismatched
	 * input error.</p>
	 *
	 * <p>The default implementation simply returns if the handler is already in
	 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
	 * enter error recovery mode, followed by calling
	 * {@link Parser#notifyErrorListeners}.</p>
	 *
	 * @param recognizer the parser instance
	 */
protected void reportUnwantedToken(Parser recognizer) {
    if (inErrorRecoveryMode(recognizer)) {
        return;
    }
    beginErrorCondition(recognizer);
    Token t = recognizer.getCurrentToken();
    String tokenName = getTokenErrorDisplay(t);
    IntervalSet expecting = getExpectedTokens(recognizer);
    String msg = "extraneous input " + tokenName + " expecting " + expecting.toString(recognizer.getVocabulary());
    recognizer.notifyErrorListeners(t, msg, null);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 9 with Parser

use of org.antlr.v4.runtime.Parser in project antlr4 by antlr.

the class DefaultErrorStrategy method recover.

/**
	 * {@inheritDoc}
	 *
	 * <p>The default implementation resynchronizes the parser by consuming tokens
	 * until we find one in the resynchronization set--loosely the set of tokens
	 * that can follow the current rule.</p>
	 */
@Override
public void recover(Parser recognizer, RecognitionException e) {
    //						   ", states="+lastErrorStates);
    if (lastErrorIndex == recognizer.getInputStream().index() && lastErrorStates != null && lastErrorStates.contains(recognizer.getState())) {
        // uh oh, another error at same token index and previously-visited
        // state in ATN; must be a case where LT(1) is in the recovery
        // token set so nothing got consumed. Consume a single token
        // at least to prevent an infinite loop; this is a failsafe.
        //			System.err.println("seen error condition before index="+
        //							   lastErrorIndex+", states="+lastErrorStates);
        //			System.err.println("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
        recognizer.consume();
    }
    lastErrorIndex = recognizer.getInputStream().index();
    if (lastErrorStates == null)
        lastErrorStates = new IntervalSet();
    lastErrorStates.add(recognizer.getState());
    IntervalSet followSet = getErrorRecoverySet(recognizer);
    consumeUntil(recognizer, followSet);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 10 with Parser

use of org.antlr.v4.runtime.Parser in project antlr4 by antlr.

the class DefaultErrorStrategy method getErrorRecoverySet.

/*  Compute the error recovery set for the current rule.  During
	 *  rule invocation, the parser pushes the set of tokens that can
	 *  follow that rule reference on the stack; this amounts to
	 *  computing FIRST of what follows the rule reference in the
	 *  enclosing rule. See LinearApproximator.FIRST().
	 *  This local follow set only includes tokens
	 *  from within the rule; i.e., the FIRST computation done by
	 *  ANTLR stops at the end of a rule.
	 *
	 *  EXAMPLE
	 *
	 *  When you find a "no viable alt exception", the input is not
	 *  consistent with any of the alternatives for rule r.  The best
	 *  thing to do is to consume tokens until you see something that
	 *  can legally follow a call to r *or* any rule that called r.
	 *  You don't want the exact set of viable next tokens because the
	 *  input might just be missing a token--you might consume the
	 *  rest of the input looking for one of the missing tokens.
	 *
	 *  Consider grammar:
	 *
	 *  a : '[' b ']'
	 *    | '(' b ')'
	 *    ;
	 *  b : c '^' INT ;
	 *  c : ID
	 *    | INT
	 *    ;
	 *
	 *  At each rule invocation, the set of tokens that could follow
	 *  that rule is pushed on a stack.  Here are the various
	 *  context-sensitive follow sets:
	 *
	 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
	 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
	 *  FOLLOW(c_in_b) = FIRST('^') = '^'
	 *
	 *  Upon erroneous input "[]", the call chain is
	 *
	 *  a -> b -> c
	 *
	 *  and, hence, the follow context stack is:
	 *
	 *  depth     follow set       start of rule execution
	 *    0         <EOF>                    a (from main())
	 *    1          ']'                     b
	 *    2          '^'                     c
	 *
	 *  Notice that ')' is not included, because b would have to have
	 *  been called from a different context in rule a for ')' to be
	 *  included.
	 *
	 *  For error recovery, we cannot consider FOLLOW(c)
	 *  (context-sensitive or otherwise).  We need the combined set of
	 *  all context-sensitive FOLLOW sets--the set of all tokens that
	 *  could follow any reference in the call chain.  We need to
	 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
	 *  we resync'd to that token, we'd consume until EOF.  We need to
	 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
	 *  In this case, for input "[]", LA(1) is ']' and in the set, so we would
	 *  not consume anything. After printing an error, rule c would
	 *  return normally.  Rule b would not find the required '^' though.
	 *  At this point, it gets a mismatched token error and throws an
	 *  exception (since LA(1) is not in the viable following token
	 *  set).  The rule exception handler tries to recover, but finds
	 *  the same recovery set and doesn't consume anything.  Rule b
	 *  exits normally returning to rule a.  Now it finds the ']' (and
	 *  with the successful match exits errorRecovery mode).
	 *
	 *  So, you can see that the parser walks up the call chain looking
	 *  for the token that was a member of the recovery set.
	 *
	 *  Errors are not generated in errorRecovery mode.
	 *
	 *  ANTLR's error recovery mechanism is based upon original ideas:
	 *
	 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
	 *
	 *  and
	 *
	 *  "A note on error recovery in recursive descent parsers":
	 *  http://portal.acm.org/citation.cfm?id=947902.947905
	 *
	 *  Later, Josef Grosch had some good ideas:
	 *
	 *  "Efficient and Comfortable Error Recovery in Recursive Descent
	 *  Parsers":
	 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
	 *
	 *  Like Grosch I implement context-sensitive FOLLOW sets that are combined
	 *  at run-time upon error to avoid overhead during parsing.
	 */
protected IntervalSet getErrorRecoverySet(Parser recognizer) {
    ATN atn = recognizer.getInterpreter().atn;
    RuleContext ctx = recognizer._ctx;
    IntervalSet recoverSet = new IntervalSet();
    while (ctx != null && ctx.invokingState >= 0) {
        // compute what follows who invoked us
        ATNState invokingState = atn.states.get(ctx.invokingState);
        RuleTransition rt = (RuleTransition) invokingState.transition(0);
        IntervalSet follow = atn.nextTokens(rt.followState);
        recoverSet.addAll(follow);
        ctx = ctx.parent;
    }
    recoverSet.remove(Token.EPSILON);
    //		System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
    return recoverSet;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) RuleTransition(org.antlr.v4.runtime.atn.RuleTransition) ATN(org.antlr.v4.runtime.atn.ATN) ATNState(org.antlr.v4.runtime.atn.ATNState)

Aggregations

Test (org.junit.Test)138 Grammar (org.antlr.v4.tool.Grammar)130 LexerGrammar (org.antlr.v4.tool.LexerGrammar)117 CommonTokenStream (org.antlr.v4.runtime.CommonTokenStream)39 ANTLRInputStream (org.antlr.v4.runtime.ANTLRInputStream)33 ParseTree (org.antlr.v4.runtime.tree.ParseTree)31 ATN (org.antlr.v4.runtime.atn.ATN)19 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)16 BaseRuntimeTest (org.antlr.v4.test.runtime.BaseRuntimeTest)14 ErrorQueue (org.antlr.v4.test.runtime.ErrorQueue)14 ArrayList (java.util.ArrayList)13 ParseCancellationException (org.antlr.v4.runtime.misc.ParseCancellationException)13 Parser (org.antlr.v4.runtime.Parser)10 RecognitionException (org.antlr.v4.runtime.RecognitionException)10 DecisionInfo (org.antlr.v4.runtime.atn.DecisionInfo)10 Lexer (org.antlr.v4.runtime.Lexer)9 ParserRuleContext (org.antlr.v4.runtime.ParserRuleContext)9 LexerInterpreter (org.antlr.v4.runtime.LexerInterpreter)8 ParserInterpreter (org.antlr.v4.runtime.ParserInterpreter)8 Token (org.antlr.v4.runtime.Token)8