Search in sources :

Example 6 with Rule

use of org.antlr.v4.tool.Rule in project antlr4 by antlr.

the class XPath method split.

// TODO: check for invalid token/rule names, bad syntax
public XPathElement[] split(String path) {
    ANTLRInputStream in;
    try {
        in = new ANTLRInputStream(new StringReader(path));
    } catch (IOException ioe) {
        throw new IllegalArgumentException("Could not read path: " + path, ioe);
    }
    XPathLexer lexer = new XPathLexer(in) {

        @Override
        public void recover(LexerNoViableAltException e) {
            throw e;
        }
    };
    lexer.removeErrorListeners();
    lexer.addErrorListener(new XPathLexerErrorListener());
    CommonTokenStream tokenStream = new CommonTokenStream(lexer);
    try {
        tokenStream.fill();
    } catch (LexerNoViableAltException e) {
        int pos = lexer.getCharPositionInLine();
        String msg = "Invalid tokens or characters at index " + pos + " in path '" + path + "'";
        throw new IllegalArgumentException(msg, e);
    }
    List<Token> tokens = tokenStream.getTokens();
    //		System.out.println("path="+path+"=>"+tokens);
    List<XPathElement> elements = new ArrayList<XPathElement>();
    int n = tokens.size();
    int i = 0;
    loop: while (i < n) {
        Token el = tokens.get(i);
        Token next = null;
        switch(el.getType()) {
            case XPathLexer.ROOT:
            case XPathLexer.ANYWHERE:
                boolean anywhere = el.getType() == XPathLexer.ANYWHERE;
                i++;
                next = tokens.get(i);
                boolean invert = next.getType() == XPathLexer.BANG;
                if (invert) {
                    i++;
                    next = tokens.get(i);
                }
                XPathElement pathElement = getXPathElement(next, anywhere);
                pathElement.invert = invert;
                elements.add(pathElement);
                i++;
                break;
            case XPathLexer.TOKEN_REF:
            case XPathLexer.RULE_REF:
            case XPathLexer.WILDCARD:
                elements.add(getXPathElement(el, false));
                i++;
                break;
            case Token.EOF:
                break loop;
            default:
                throw new IllegalArgumentException("Unknowth path element " + el);
        }
    }
    return elements.toArray(new XPathElement[0]);
}
Also used : CommonTokenStream(org.antlr.v4.runtime.CommonTokenStream) ArrayList(java.util.ArrayList) Token(org.antlr.v4.runtime.Token) IOException(java.io.IOException) LexerNoViableAltException(org.antlr.v4.runtime.LexerNoViableAltException) StringReader(java.io.StringReader) ANTLRInputStream(org.antlr.v4.runtime.ANTLRInputStream)

Example 7 with Rule

use of org.antlr.v4.tool.Rule in project antlr4 by antlr.

the class DefaultErrorStrategy method recover.

/**
	 * {@inheritDoc}
	 *
	 * <p>The default implementation resynchronizes the parser by consuming tokens
	 * until we find one in the resynchronization set--loosely the set of tokens
	 * that can follow the current rule.</p>
	 */
@Override
public void recover(Parser recognizer, RecognitionException e) {
    //						   ", states="+lastErrorStates);
    if (lastErrorIndex == recognizer.getInputStream().index() && lastErrorStates != null && lastErrorStates.contains(recognizer.getState())) {
        // uh oh, another error at same token index and previously-visited
        // state in ATN; must be a case where LT(1) is in the recovery
        // token set so nothing got consumed. Consume a single token
        // at least to prevent an infinite loop; this is a failsafe.
        //			System.err.println("seen error condition before index="+
        //							   lastErrorIndex+", states="+lastErrorStates);
        //			System.err.println("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
        recognizer.consume();
    }
    lastErrorIndex = recognizer.getInputStream().index();
    if (lastErrorStates == null)
        lastErrorStates = new IntervalSet();
    lastErrorStates.add(recognizer.getState());
    IntervalSet followSet = getErrorRecoverySet(recognizer);
    consumeUntil(recognizer, followSet);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 8 with Rule

use of org.antlr.v4.tool.Rule in project antlr4 by antlr.

the class DefaultErrorStrategy method getErrorRecoverySet.

/*  Compute the error recovery set for the current rule.  During
	 *  rule invocation, the parser pushes the set of tokens that can
	 *  follow that rule reference on the stack; this amounts to
	 *  computing FIRST of what follows the rule reference in the
	 *  enclosing rule. See LinearApproximator.FIRST().
	 *  This local follow set only includes tokens
	 *  from within the rule; i.e., the FIRST computation done by
	 *  ANTLR stops at the end of a rule.
	 *
	 *  EXAMPLE
	 *
	 *  When you find a "no viable alt exception", the input is not
	 *  consistent with any of the alternatives for rule r.  The best
	 *  thing to do is to consume tokens until you see something that
	 *  can legally follow a call to r *or* any rule that called r.
	 *  You don't want the exact set of viable next tokens because the
	 *  input might just be missing a token--you might consume the
	 *  rest of the input looking for one of the missing tokens.
	 *
	 *  Consider grammar:
	 *
	 *  a : '[' b ']'
	 *    | '(' b ')'
	 *    ;
	 *  b : c '^' INT ;
	 *  c : ID
	 *    | INT
	 *    ;
	 *
	 *  At each rule invocation, the set of tokens that could follow
	 *  that rule is pushed on a stack.  Here are the various
	 *  context-sensitive follow sets:
	 *
	 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
	 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
	 *  FOLLOW(c_in_b) = FIRST('^') = '^'
	 *
	 *  Upon erroneous input "[]", the call chain is
	 *
	 *  a -> b -> c
	 *
	 *  and, hence, the follow context stack is:
	 *
	 *  depth     follow set       start of rule execution
	 *    0         <EOF>                    a (from main())
	 *    1          ']'                     b
	 *    2          '^'                     c
	 *
	 *  Notice that ')' is not included, because b would have to have
	 *  been called from a different context in rule a for ')' to be
	 *  included.
	 *
	 *  For error recovery, we cannot consider FOLLOW(c)
	 *  (context-sensitive or otherwise).  We need the combined set of
	 *  all context-sensitive FOLLOW sets--the set of all tokens that
	 *  could follow any reference in the call chain.  We need to
	 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
	 *  we resync'd to that token, we'd consume until EOF.  We need to
	 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
	 *  In this case, for input "[]", LA(1) is ']' and in the set, so we would
	 *  not consume anything. After printing an error, rule c would
	 *  return normally.  Rule b would not find the required '^' though.
	 *  At this point, it gets a mismatched token error and throws an
	 *  exception (since LA(1) is not in the viable following token
	 *  set).  The rule exception handler tries to recover, but finds
	 *  the same recovery set and doesn't consume anything.  Rule b
	 *  exits normally returning to rule a.  Now it finds the ']' (and
	 *  with the successful match exits errorRecovery mode).
	 *
	 *  So, you can see that the parser walks up the call chain looking
	 *  for the token that was a member of the recovery set.
	 *
	 *  Errors are not generated in errorRecovery mode.
	 *
	 *  ANTLR's error recovery mechanism is based upon original ideas:
	 *
	 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
	 *
	 *  and
	 *
	 *  "A note on error recovery in recursive descent parsers":
	 *  http://portal.acm.org/citation.cfm?id=947902.947905
	 *
	 *  Later, Josef Grosch had some good ideas:
	 *
	 *  "Efficient and Comfortable Error Recovery in Recursive Descent
	 *  Parsers":
	 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
	 *
	 *  Like Grosch I implement context-sensitive FOLLOW sets that are combined
	 *  at run-time upon error to avoid overhead during parsing.
	 */
protected IntervalSet getErrorRecoverySet(Parser recognizer) {
    ATN atn = recognizer.getInterpreter().atn;
    RuleContext ctx = recognizer._ctx;
    IntervalSet recoverSet = new IntervalSet();
    while (ctx != null && ctx.invokingState >= 0) {
        // compute what follows who invoked us
        ATNState invokingState = atn.states.get(ctx.invokingState);
        RuleTransition rt = (RuleTransition) invokingState.transition(0);
        IntervalSet follow = atn.nextTokens(rt.followState);
        recoverSet.addAll(follow);
        ctx = ctx.parent;
    }
    recoverSet.remove(Token.EPSILON);
    //		System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
    return recoverSet;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) RuleTransition(org.antlr.v4.runtime.atn.RuleTransition) ATN(org.antlr.v4.runtime.atn.ATN) ATNState(org.antlr.v4.runtime.atn.ATNState)

Example 9 with Rule

use of org.antlr.v4.tool.Rule in project antlr4 by antlr.

the class DefaultErrorStrategy method sync.

/**
	 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
	 * that the current lookahead symbol is consistent with what were expecting
	 * at this point in the ATN. You can call this anytime but ANTLR only
	 * generates code to check before subrules/loops and each iteration.
	 *
	 * <p>Implements Jim Idle's magic sync mechanism in closures and optional
	 * subrules. E.g.,</p>
	 *
	 * <pre>
	 * a : sync ( stuff sync )* ;
	 * sync : {consume to what can follow sync} ;
	 * </pre>
	 *
	 * At the start of a sub rule upon error, {@link #sync} performs single
	 * token deletion, if possible. If it can't do that, it bails on the current
	 * rule and uses the default error recovery, which consumes until the
	 * resynchronization set of the current rule.
	 *
	 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
	 * with an empty alternative), then the expected set includes what follows
	 * the subrule.</p>
	 *
	 * <p>During loop iteration, it consumes until it sees a token that can start a
	 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
	 * stay in the loop as long as possible.</p>
	 *
	 * <p><strong>ORIGINS</strong></p>
	 *
	 * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
	 * A single mismatch token or missing token would force the parser to bail
	 * out of the entire rules surrounding the loop. So, for rule</p>
	 *
	 * <pre>
	 * classDef : 'class' ID '{' member* '}'
	 * </pre>
	 *
	 * input with an extra token between members would force the parser to
	 * consume until it found the next class definition rather than the next
	 * member definition of the current class.
	 *
	 * <p>This functionality cost a little bit of effort because the parser has to
	 * compare token set at the start of the loop and at each iteration. If for
	 * some reason speed is suffering for you, you can turn off this
	 * functionality by simply overriding this method as a blank { }.</p>
	 */
@Override
public void sync(Parser recognizer) throws RecognitionException {
    ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState());
    // If already recovering, don't try to sync
    if (inErrorRecoveryMode(recognizer)) {
        return;
    }
    TokenStream tokens = recognizer.getInputStream();
    int la = tokens.LA(1);
    // try cheaper subset first; might get lucky. seems to shave a wee bit off
    IntervalSet nextTokens = recognizer.getATN().nextTokens(s);
    if (nextTokens.contains(Token.EPSILON) || nextTokens.contains(la)) {
        return;
    }
    switch(s.getStateType()) {
        case ATNState.BLOCK_START:
        case ATNState.STAR_BLOCK_START:
        case ATNState.PLUS_BLOCK_START:
        case ATNState.STAR_LOOP_ENTRY:
            // report error and recover if possible
            if (singleTokenDeletion(recognizer) != null) {
                return;
            }
            throw new InputMismatchException(recognizer);
        case ATNState.PLUS_LOOP_BACK:
        case ATNState.STAR_LOOP_BACK:
            //			System.err.println("at loop back: "+s.getClass().getSimpleName());
            reportUnwantedToken(recognizer);
            IntervalSet expecting = recognizer.getExpectedTokens();
            IntervalSet whatFollowsLoopIterationOrRule = expecting.or(getErrorRecoverySet(recognizer));
            consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
            break;
        default:
            // do nothing if we can't identify the exact kind of ATN state
            break;
    }
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) ATNState(org.antlr.v4.runtime.atn.ATNState)

Example 10 with Rule

use of org.antlr.v4.tool.Rule in project antlr4 by antlr.

the class ATN method nextTokens.

/** Compute the set of valid tokens that can occur starting in state {@code s}.
	 *  If {@code ctx} is null, the set of tokens will not include what can follow
	 *  the rule surrounding {@code s}. In other words, the set will be
	 *  restricted to tokens reachable staying within {@code s}'s rule.
	 */
public IntervalSet nextTokens(ATNState s, RuleContext ctx) {
    LL1Analyzer anal = new LL1Analyzer(this);
    IntervalSet next = anal.LOOK(s, ctx);
    return next;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Aggregations

LexerGrammar (org.antlr.v4.tool.LexerGrammar)50 ATN (org.antlr.v4.runtime.atn.ATN)47 Rule (org.antlr.v4.tool.Rule)47 Test (org.junit.Test)46 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)31 Grammar (org.antlr.v4.tool.Grammar)30 ATNState (org.antlr.v4.runtime.atn.ATNState)26 LeftRecursiveRule (org.antlr.v4.tool.LeftRecursiveRule)24 ArrayList (java.util.ArrayList)20 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)17 DOTGenerator (org.antlr.v4.tool.DOTGenerator)9 HashMap (java.util.HashMap)8 ParserRuleContext (org.antlr.v4.runtime.ParserRuleContext)8 RuleStartState (org.antlr.v4.runtime.atn.RuleStartState)8 ActionAST (org.antlr.v4.tool.ast.ActionAST)8 ParserATNFactory (org.antlr.v4.automata.ParserATNFactory)7 GrammarASTAdaptor (org.antlr.v4.parse.GrammarASTAdaptor)7 DecisionState (org.antlr.v4.runtime.atn.DecisionState)7 RuleAST (org.antlr.v4.tool.ast.RuleAST)7 LinkedHashMap (java.util.LinkedHashMap)6