Search in sources :

Example 81 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class ATNSerializer method serialize.

/**
 * Serialize state descriptors, edge descriptors, and decision→state map
 *  into list of ints:
 *
 * 		grammar-type, (ANTLRParser.LEXER, ...)
 *  	max token type,
 *  	num states,
 *  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
 *  	num rules,
 *  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
 *  	(args are token type,actionIndex in lexer else 0,0)
 *      num modes,
 *      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
 *      num unicode-bmp-sets
 *      bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
 *      num unicode-smp-sets
 *      smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
 *	num total edges,
 *      src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
 *      num decisions,
 *      decision-0-start-state, decision-1-start-state, ...
 *
 *  Convenient to pack into unsigned shorts to make as Java string.
 */
public IntegerList serialize() {
    IntegerList data = new IntegerList();
    data.add(ATNDeserializer.SERIALIZED_VERSION);
    serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
    // convert grammar type to ATN const to avoid dependence on ANTLRParser
    data.add(atn.grammarType.ordinal());
    data.add(atn.maxTokenType);
    int nedges = 0;
    // Note that we use a LinkedHashMap as a set to
    // maintain insertion order while deduplicating
    // entries with the same key.
    Map<IntervalSet, Boolean> sets = new LinkedHashMap<IntervalSet, Boolean>();
    // dump states, count edges and collect sets while doing so
    IntegerList nonGreedyStates = new IntegerList();
    IntegerList sllStates = new IntegerList();
    IntegerList precedenceStates = new IntegerList();
    data.add(atn.states.size());
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            data.add(ATNState.INVALID_TYPE);
            continue;
        }
        int stateType = s.getStateType();
        if (s instanceof DecisionState) {
            DecisionState decisionState = (DecisionState) s;
            if (decisionState.nonGreedy) {
                nonGreedyStates.add(s.stateNumber);
            }
            if (decisionState.sll) {
                sllStates.add(s.stateNumber);
            }
        }
        if (s instanceof RuleStartState && ((RuleStartState) s).isPrecedenceRule) {
            precedenceStates.add(s.stateNumber);
        }
        data.add(stateType);
        if (s.ruleIndex == -1) {
            data.add(Character.MAX_VALUE);
        } else {
            data.add(s.ruleIndex);
        }
        if (s.getStateType() == ATNState.LOOP_END) {
            data.add(((LoopEndState) s).loopBackState.stateNumber);
        } else if (s instanceof BlockStartState) {
            data.add(((BlockStartState) s).endState.stateNumber);
        }
        if (s.getStateType() != ATNState.RULE_STOP) {
            // the deserializer can trivially derive these edges, so there's no need to serialize them
            nedges += s.getNumberOfTransitions();
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            int edgeType = Transition.serializationTypes.get(t.getClass());
            if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
                SetTransition st = (SetTransition) t;
                sets.put(st.set, true);
            }
        }
    }
    // non-greedy states
    data.add(nonGreedyStates.size());
    for (int i = 0; i < nonGreedyStates.size(); i++) {
        data.add(nonGreedyStates.get(i));
    }
    // SLL decisions
    data.add(sllStates.size());
    for (int i = 0; i < sllStates.size(); i++) {
        data.add(sllStates.get(i));
    }
    // precedence states
    data.add(precedenceStates.size());
    for (int i = 0; i < precedenceStates.size(); i++) {
        data.add(precedenceStates.get(i));
    }
    int nrules = atn.ruleToStartState.length;
    data.add(nrules);
    for (int r = 0; r < nrules; r++) {
        ATNState ruleStartState = atn.ruleToStartState[r];
        data.add(ruleStartState.stateNumber);
        boolean leftFactored = ruleNames.get(ruleStartState.ruleIndex).indexOf(ATNSimulator.RULE_VARIANT_DELIMITER) >= 0;
        data.add(leftFactored ? 1 : 0);
        if (atn.grammarType == ATNType.LEXER) {
            if (atn.ruleToTokenType[r] == Token.EOF) {
                data.add(Character.MAX_VALUE);
            } else {
                data.add(atn.ruleToTokenType[r]);
            }
        }
    }
    int nmodes = atn.modeToStartState.size();
    data.add(nmodes);
    if (nmodes > 0) {
        for (ATNState modeStartState : atn.modeToStartState) {
            data.add(modeStartState.stateNumber);
        }
    }
    List<IntervalSet> bmpSets = new ArrayList<IntervalSet>();
    List<IntervalSet> smpSets = new ArrayList<IntervalSet>();
    for (IntervalSet set : sets.keySet()) {
        if (set.getMaxElement() <= Character.MAX_VALUE) {
            bmpSets.add(set);
        } else {
            smpSets.add(set);
        }
    }
    serializeSets(data, bmpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            data.add(cp);
        }
    });
    serializeSets(data, smpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            serializeInt(data, cp);
        }
    });
    Map<IntervalSet, Integer> setIndices = new HashMap<IntervalSet, Integer>();
    int setIndex = 0;
    for (IntervalSet bmpSet : bmpSets) {
        setIndices.put(bmpSet, setIndex++);
    }
    for (IntervalSet smpSet : smpSets) {
        setIndices.put(smpSet, setIndex++);
    }
    data.add(nedges);
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            continue;
        }
        if (s.getStateType() == ATNState.RULE_STOP) {
            continue;
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            if (atn.states.get(t.target.stateNumber) == null) {
                throw new IllegalStateException("Cannot serialize a transition to a removed state.");
            }
            int src = s.stateNumber;
            int trg = t.target.stateNumber;
            int edgeType = Transition.serializationTypes.get(t.getClass());
            int arg1 = 0;
            int arg2 = 0;
            int arg3 = 0;
            switch(edgeType) {
                case Transition.RULE:
                    trg = ((RuleTransition) t).followState.stateNumber;
                    arg1 = ((RuleTransition) t).target.stateNumber;
                    arg2 = ((RuleTransition) t).ruleIndex;
                    arg3 = ((RuleTransition) t).precedence;
                    break;
                case Transition.PRECEDENCE:
                    PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
                    arg1 = ppt.precedence;
                    break;
                case Transition.PREDICATE:
                    PredicateTransition pt = (PredicateTransition) t;
                    arg1 = pt.ruleIndex;
                    arg2 = pt.predIndex;
                    arg3 = pt.isCtxDependent ? 1 : 0;
                    break;
                case Transition.RANGE:
                    arg1 = ((RangeTransition) t).from;
                    arg2 = ((RangeTransition) t).to;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ATOM:
                    arg1 = ((AtomTransition) t).label;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ACTION:
                    ActionTransition at = (ActionTransition) t;
                    arg1 = at.ruleIndex;
                    arg2 = at.actionIndex;
                    if (arg2 == -1) {
                        arg2 = 0xFFFF;
                    }
                    arg3 = at.isCtxDependent ? 1 : 0;
                    break;
                case Transition.SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.NOT_SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.WILDCARD:
                    break;
            }
            data.add(src);
            data.add(trg);
            data.add(edgeType);
            data.add(arg1);
            data.add(arg2);
            data.add(arg3);
        }
    }
    int ndecisions = atn.decisionToState.size();
    data.add(ndecisions);
    for (DecisionState decStartState : atn.decisionToState) {
        data.add(decStartState.stateNumber);
    }
    // 
    if (atn.grammarType == ATNType.LEXER) {
        data.add(atn.lexerActions.length);
        for (LexerAction action : atn.lexerActions) {
            data.add(action.getActionType().ordinal());
            switch(action.getActionType()) {
                case CHANNEL:
                    int channel = ((LexerChannelAction) action).getChannel();
                    data.add(channel != -1 ? channel : 0xFFFF);
                    data.add(0);
                    break;
                case CUSTOM:
                    int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
                    int actionIndex = ((LexerCustomAction) action).getActionIndex();
                    data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
                    data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
                    break;
                case MODE:
                    int mode = ((LexerModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case MORE:
                    data.add(0);
                    data.add(0);
                    break;
                case POP_MODE:
                    data.add(0);
                    data.add(0);
                    break;
                case PUSH_MODE:
                    mode = ((LexerPushModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case SKIP:
                    data.add(0);
                    data.add(0);
                    break;
                case TYPE:
                    int type = ((LexerTypeAction) action).getType();
                    data.add(type != -1 ? type : 0xFFFF);
                    data.add(0);
                    break;
                default:
                    String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
                    throw new IllegalArgumentException(message);
            }
        }
    }
    // don't adjust the first value since that's the version number
    for (int i = 1; i < data.size(); i++) {
        if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
            throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
        }
        int value = (data.get(i) + 2) & 0xFFFF;
        data.set(i, value);
    }
    return data;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) IntegerList(org.antlr.v4.runtime.misc.IntegerList) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 82 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class Parser method isExpectedToken.

/**
 * Checks whether or not {@code symbol} can follow the current state in the
 * ATN. The behavior of this method is equivalent to the following, but is
 * implemented such that the complete context-sensitive follow set does not
 * need to be explicitly constructed.
 *
 * <pre>
 * return getExpectedTokens().contains(symbol);
 * </pre>
 *
 * @param symbol the symbol type to check
 * @return {@code true} if {@code symbol} can follow the current state in
 * the ATN, otherwise {@code false}.
 */
public boolean isExpectedToken(int symbol) {
    // return getInterpreter().atn.nextTokens(_ctx);
    ATN atn = getInterpreter().atn;
    ParserRuleContext ctx = _ctx;
    ATNState s = atn.states.get(getState());
    IntervalSet following = atn.nextTokens(s);
    if (following.contains(symbol)) {
        return true;
    }
    // System.out.println("following "+s+"="+following);
    if (!following.contains(Token.EPSILON))
        return false;
    while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
        ATNState invokingState = atn.states.get(ctx.invokingState);
        RuleTransition rt = (RuleTransition) invokingState.transition(0);
        following = atn.nextTokens(rt.followState);
        if (following.contains(symbol)) {
            return true;
        }
        ctx = (ParserRuleContext) ctx.parent;
    }
    if (following.contains(Token.EPSILON) && symbol == Token.EOF) {
        return true;
    }
    return false;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) RuleTransition(org.antlr.v4.runtime.atn.RuleTransition) ATN(org.antlr.v4.runtime.atn.ATN) ATNState(org.antlr.v4.runtime.atn.ATNState)

Example 83 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class ParserInterpreter method parse.

/**
 * Begin parsing at startRuleIndex
 */
public ParserRuleContext parse(int startRuleIndex) {
    RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex];
    rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
    if (startRuleStartState.isPrecedenceRule) {
        enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
    } else {
        enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex);
    }
    while (true) {
        ATNState p = getATNState();
        switch(p.getStateType()) {
            case ATNState.RULE_STOP:
                // pop; return from rule
                if (_ctx.isEmpty()) {
                    if (startRuleStartState.isPrecedenceRule) {
                        ParserRuleContext result = _ctx;
                        Tuple2<ParserRuleContext, Integer> parentContext = _parentContextStack.pop();
                        unrollRecursionContexts(parentContext.getItem1());
                        return result;
                    } else {
                        exitRule();
                        return rootContext;
                    }
                }
                visitRuleStopState(p);
                break;
            default:
                try {
                    visitState(p);
                } catch (RecognitionException e) {
                    setState(atn.ruleToStopState[p.ruleIndex].stateNumber);
                    getContext().exception = e;
                    getErrorHandler().reportError(this, e);
                    recover(e);
                }
                break;
        }
    }
}
Also used : RuleStartState(org.antlr.v4.runtime.atn.RuleStartState) ATNState(org.antlr.v4.runtime.atn.ATNState)

Example 84 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class ParserInterpreter method visitRuleStopState.

protected void visitRuleStopState(ATNState p) {
    RuleStartState ruleStartState = atn.ruleToStartState[p.ruleIndex];
    if (ruleStartState.isPrecedenceRule) {
        Tuple2<ParserRuleContext, Integer> parentContext = _parentContextStack.pop();
        unrollRecursionContexts(parentContext.getItem1());
        setState(parentContext.getItem2());
    } else {
        exitRule();
    }
    RuleTransition ruleTransition = (RuleTransition) atn.states.get(getState()).transition(0);
    setState(ruleTransition.followState.stateNumber);
}
Also used : RuleStartState(org.antlr.v4.runtime.atn.RuleStartState) RuleTransition(org.antlr.v4.runtime.atn.RuleTransition)

Example 85 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class DefaultErrorStrategy method sync.

/**
 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
 * that the current lookahead symbol is consistent with what were expecting
 * at this point in the ATN. You can call this anytime but ANTLR only
 * generates code to check before subrules/loops and each iteration.
 *
 * <p>Implements Jim Idle's magic sync mechanism in closures and optional
 * subrules. E.g.,</p>
 *
 * <pre>
 * a : sync ( stuff sync )* ;
 * sync : {consume to what can follow sync} ;
 * </pre>
 *
 * At the start of a sub rule upon error, {@link #sync} performs single
 * token deletion, if possible. If it can't do that, it bails on the current
 * rule and uses the default error recovery, which consumes until the
 * resynchronization set of the current rule.
 *
 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
 * with an empty alternative), then the expected set includes what follows
 * the subrule.</p>
 *
 * <p>During loop iteration, it consumes until it sees a token that can start a
 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
 * stay in the loop as long as possible.</p>
 *
 * <p><strong>ORIGINS</strong></p>
 *
 * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
 * A single mismatch token or missing token would force the parser to bail
 * out of the entire rules surrounding the loop. So, for rule</p>
 *
 * <pre>
 * classDef : 'class' ID '{' member* '}'
 * </pre>
 *
 * input with an extra token between members would force the parser to
 * consume until it found the next class definition rather than the next
 * member definition of the current class.
 *
 * <p>This functionality cost a little bit of effort because the parser has to
 * compare token set at the start of the loop and at each iteration. If for
 * some reason speed is suffering for you, you can turn off this
 * functionality by simply overriding this method as a blank { }.</p>
 */
@Override
public void sync(Parser recognizer) throws RecognitionException {
    ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState());
    // If already recovering, don't try to sync
    if (inErrorRecoveryMode(recognizer)) {
        return;
    }
    TokenStream tokens = recognizer.getInputStream();
    int la = tokens.LA(1);
    // try cheaper subset first; might get lucky. seems to shave a wee bit off
    IntervalSet nextTokens = recognizer.getATN().nextTokens(s);
    if (nextTokens.contains(la)) {
        // We are sure the token matches
        nextTokensContext = null;
        nextTokensState = ATNState.INVALID_STATE_NUMBER;
        return;
    }
    if (nextTokens.contains(Token.EPSILON)) {
        if (nextTokensContext == null) {
            // It's possible the next token won't match; information tracked
            // by sync is restricted for performance.
            nextTokensContext = recognizer.getContext();
            nextTokensState = recognizer.getState();
        }
        return;
    }
    switch(s.getStateType()) {
        case ATNState.BLOCK_START:
        case ATNState.STAR_BLOCK_START:
        case ATNState.PLUS_BLOCK_START:
        case ATNState.STAR_LOOP_ENTRY:
            // report error and recover if possible
            if (singleTokenDeletion(recognizer) != null) {
                return;
            }
            throw new InputMismatchException(recognizer);
        case ATNState.PLUS_LOOP_BACK:
        case ATNState.STAR_LOOP_BACK:
            // System.err.println("at loop back: "+s.getClass().getSimpleName());
            reportUnwantedToken(recognizer);
            IntervalSet expecting = recognizer.getExpectedTokens();
            IntervalSet whatFollowsLoopIterationOrRule = expecting.or(getErrorRecoverySet(recognizer));
            consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
            break;
        default:
            // do nothing if we can't identify the exact kind of ATN state
            break;
    }
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) ATNState(org.antlr.v4.runtime.atn.ATNState)

Aggregations

ATNState (org.antlr.v4.runtime.atn.ATNState)94 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)34 ATN (org.antlr.v4.runtime.atn.ATN)25 RuleTransition (org.antlr.v4.runtime.atn.RuleTransition)23 Rule (org.antlr.v4.tool.Rule)22 Transition (org.antlr.v4.runtime.atn.Transition)21 NotNull (org.antlr.v4.runtime.misc.NotNull)19 AtomTransition (org.antlr.v4.runtime.atn.AtomTransition)18 RuleStartState (org.antlr.v4.runtime.atn.RuleStartState)17 ActionTransition (org.antlr.v4.runtime.atn.ActionTransition)16 SetTransition (org.antlr.v4.runtime.atn.SetTransition)16 NotSetTransition (org.antlr.v4.runtime.atn.NotSetTransition)15 DecisionState (org.antlr.v4.runtime.atn.DecisionState)11 DOTGenerator (org.antlr.v4.tool.DOTGenerator)10 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)10 ParserATNFactory (org.antlr.v4.automata.ParserATNFactory)9 LeftRecursiveRule (org.antlr.v4.tool.LeftRecursiveRule)9 ArrayList (java.util.ArrayList)8 EpsilonTransition (org.antlr.v4.runtime.atn.EpsilonTransition)8 PrecedencePredicateTransition (org.antlr.v4.runtime.atn.PrecedencePredicateTransition)8