Search in sources :

Example 31 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class LL1Analyzer method getDecisionLookahead.

/**
 * Calculates the SLL(1) expected lookahead set for each outgoing transition
 * of an {@link ATNState}. The returned array has one element for each
 * outgoing transition in {@code s}. If the closure from transition
 * <em>i</em> leads to a semantic predicate before matching a symbol, the
 * element at index <em>i</em> of the result will be {@code null}.
 *
 * @param s the ATN state
 * @return the expected symbols for each outgoing transition of {@code s}.
 */
@Nullable
public IntervalSet[] getDecisionLookahead(@Nullable ATNState s) {
    // System.out.println("LOOK("+s.stateNumber+")");
    if (s == null) {
        return null;
    }
    IntervalSet[] look = new IntervalSet[s.getNumberOfTransitions()];
    for (int alt = 0; alt < s.getNumberOfTransitions(); alt++) {
        look[alt] = new IntervalSet();
        Set<ATNConfig> lookBusy = new HashSet<ATNConfig>();
        // fail to get lookahead upon pred
        boolean seeThruPreds = false;
        _LOOK(s.transition(alt).target, null, PredictionContext.EMPTY_LOCAL, look[alt], lookBusy, new BitSet(), seeThruPreds, false);
        // or we had a predicate when we !seeThruPreds
        if (look[alt].size() == 0 || look[alt].contains(HIT_PRED)) {
            look[alt] = null;
        }
    }
    return look;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) BitSet(java.util.BitSet) HashSet(java.util.HashSet) Nullable(org.antlr.v4.runtime.misc.Nullable)

Example 32 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.

the class DFASerializer method getContextLabel.

protected String getContextLabel(int i) {
    if (i == PredictionContext.EMPTY_FULL_STATE_KEY) {
        return "ctx:EMPTY_FULL";
    } else if (i == PredictionContext.EMPTY_LOCAL_STATE_KEY) {
        return "ctx:EMPTY_LOCAL";
    }
    if (atn != null && i > 0 && i <= atn.states.size()) {
        ATNState state = atn.states.get(i);
        int ruleIndex = state.ruleIndex;
        if (ruleNames != null && ruleIndex >= 0 && ruleIndex < ruleNames.length) {
            return "ctx:" + String.valueOf(i) + "(" + ruleNames[ruleIndex] + ")";
        }
    }
    return "ctx:" + String.valueOf(i);
}
Also used : ATNState(org.antlr.v4.runtime.atn.ATNState)

Example 33 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by antlr.

the class ATNSerializer method serialize.

/**
 * Serialize state descriptors, edge descriptors, and decision&rarr;state map
 *  into list of ints:
 *
 * 		grammar-type, (ANTLRParser.LEXER, ...)
 *  	max token type,
 *  	num states,
 *  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
 *  	num rules,
 *  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
 *  	(args are token type,actionIndex in lexer else 0,0)
 *      num modes,
 *      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
 *      num unicode-bmp-sets
 *      bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
 *      num unicode-smp-sets
 *      smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
 *	num total edges,
 *      src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
 *      num decisions,
 *      decision-0-start-state, decision-1-start-state, ...
 *
 *  Convenient to pack into unsigned shorts to make as Java string.
 */
public IntegerList serialize() {
    IntegerList data = new IntegerList();
    data.add(ATNDeserializer.SERIALIZED_VERSION);
    serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
    // convert grammar type to ATN const to avoid dependence on ANTLRParser
    data.add(atn.grammarType.ordinal());
    data.add(atn.maxTokenType);
    int nedges = 0;
    // Note that we use a LinkedHashMap as a set to
    // maintain insertion order while deduplicating
    // entries with the same key.
    Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
    // dump states, count edges and collect sets while doing so
    IntegerList nonGreedyStates = new IntegerList();
    IntegerList precedenceStates = new IntegerList();
    data.add(atn.states.size());
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            data.add(ATNState.INVALID_TYPE);
            continue;
        }
        int stateType = s.getStateType();
        if (s instanceof DecisionState && ((DecisionState) s).nonGreedy) {
            nonGreedyStates.add(s.stateNumber);
        }
        if (s instanceof RuleStartState && ((RuleStartState) s).isLeftRecursiveRule) {
            precedenceStates.add(s.stateNumber);
        }
        data.add(stateType);
        if (s.ruleIndex == -1) {
            data.add(Character.MAX_VALUE);
        } else {
            data.add(s.ruleIndex);
        }
        if (s.getStateType() == ATNState.LOOP_END) {
            data.add(((LoopEndState) s).loopBackState.stateNumber);
        } else if (s instanceof BlockStartState) {
            data.add(((BlockStartState) s).endState.stateNumber);
        }
        if (s.getStateType() != ATNState.RULE_STOP) {
            // the deserializer can trivially derive these edges, so there's no need to serialize them
            nedges += s.getNumberOfTransitions();
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            int edgeType = Transition.serializationTypes.get(t.getClass());
            if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
                SetTransition st = (SetTransition) t;
                sets.put(st.set, true);
            }
        }
    }
    // non-greedy states
    data.add(nonGreedyStates.size());
    for (int i = 0; i < nonGreedyStates.size(); i++) {
        data.add(nonGreedyStates.get(i));
    }
    // precedence states
    data.add(precedenceStates.size());
    for (int i = 0; i < precedenceStates.size(); i++) {
        data.add(precedenceStates.get(i));
    }
    int nrules = atn.ruleToStartState.length;
    data.add(nrules);
    for (int r = 0; r < nrules; r++) {
        ATNState ruleStartState = atn.ruleToStartState[r];
        data.add(ruleStartState.stateNumber);
        if (atn.grammarType == ATNType.LEXER) {
            if (atn.ruleToTokenType[r] == Token.EOF) {
                data.add(Character.MAX_VALUE);
            } else {
                data.add(atn.ruleToTokenType[r]);
            }
        }
    }
    int nmodes = atn.modeToStartState.size();
    data.add(nmodes);
    if (nmodes > 0) {
        for (ATNState modeStartState : atn.modeToStartState) {
            data.add(modeStartState.stateNumber);
        }
    }
    List<IntervalSet> bmpSets = new ArrayList<>();
    List<IntervalSet> smpSets = new ArrayList<>();
    for (IntervalSet set : sets.keySet()) {
        if (!set.isNil() && set.getMaxElement() <= Character.MAX_VALUE) {
            bmpSets.add(set);
        } else {
            smpSets.add(set);
        }
    }
    serializeSets(data, bmpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            data.add(cp);
        }
    });
    serializeSets(data, smpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            serializeInt(data, cp);
        }
    });
    Map<IntervalSet, Integer> setIndices = new HashMap<>();
    int setIndex = 0;
    for (IntervalSet bmpSet : bmpSets) {
        setIndices.put(bmpSet, setIndex++);
    }
    for (IntervalSet smpSet : smpSets) {
        setIndices.put(smpSet, setIndex++);
    }
    data.add(nedges);
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            continue;
        }
        if (s.getStateType() == ATNState.RULE_STOP) {
            continue;
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            if (atn.states.get(t.target.stateNumber) == null) {
                throw new IllegalStateException("Cannot serialize a transition to a removed state.");
            }
            int src = s.stateNumber;
            int trg = t.target.stateNumber;
            int edgeType = Transition.serializationTypes.get(t.getClass());
            int arg1 = 0;
            int arg2 = 0;
            int arg3 = 0;
            switch(edgeType) {
                case Transition.RULE:
                    trg = ((RuleTransition) t).followState.stateNumber;
                    arg1 = ((RuleTransition) t).target.stateNumber;
                    arg2 = ((RuleTransition) t).ruleIndex;
                    arg3 = ((RuleTransition) t).precedence;
                    break;
                case Transition.PRECEDENCE:
                    PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
                    arg1 = ppt.precedence;
                    break;
                case Transition.PREDICATE:
                    PredicateTransition pt = (PredicateTransition) t;
                    arg1 = pt.ruleIndex;
                    arg2 = pt.predIndex;
                    arg3 = pt.isCtxDependent ? 1 : 0;
                    break;
                case Transition.RANGE:
                    arg1 = ((RangeTransition) t).from;
                    arg2 = ((RangeTransition) t).to;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ATOM:
                    arg1 = ((AtomTransition) t).label;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ACTION:
                    ActionTransition at = (ActionTransition) t;
                    arg1 = at.ruleIndex;
                    arg2 = at.actionIndex;
                    if (arg2 == -1) {
                        arg2 = 0xFFFF;
                    }
                    arg3 = at.isCtxDependent ? 1 : 0;
                    break;
                case Transition.SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.NOT_SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.WILDCARD:
                    break;
            }
            data.add(src);
            data.add(trg);
            data.add(edgeType);
            data.add(arg1);
            data.add(arg2);
            data.add(arg3);
        }
    }
    int ndecisions = atn.decisionToState.size();
    data.add(ndecisions);
    for (DecisionState decStartState : atn.decisionToState) {
        data.add(decStartState.stateNumber);
    }
    // 
    if (atn.grammarType == ATNType.LEXER) {
        data.add(atn.lexerActions.length);
        for (LexerAction action : atn.lexerActions) {
            data.add(action.getActionType().ordinal());
            switch(action.getActionType()) {
                case CHANNEL:
                    int channel = ((LexerChannelAction) action).getChannel();
                    data.add(channel != -1 ? channel : 0xFFFF);
                    data.add(0);
                    break;
                case CUSTOM:
                    int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
                    int actionIndex = ((LexerCustomAction) action).getActionIndex();
                    data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
                    data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
                    break;
                case MODE:
                    int mode = ((LexerModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case MORE:
                    data.add(0);
                    data.add(0);
                    break;
                case POP_MODE:
                    data.add(0);
                    data.add(0);
                    break;
                case PUSH_MODE:
                    mode = ((LexerPushModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case SKIP:
                    data.add(0);
                    data.add(0);
                    break;
                case TYPE:
                    int type = ((LexerTypeAction) action).getType();
                    data.add(type != -1 ? type : 0xFFFF);
                    data.add(0);
                    break;
                default:
                    String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
                    throw new IllegalArgumentException(message);
            }
        }
    }
    // don't adjust the first value since that's the version number
    for (int i = 1; i < data.size(); i++) {
        if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
            throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
        }
        int value = (data.get(i) + 2) & 0xFFFF;
        data.set(i, value);
    }
    return data;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) IntegerList(org.antlr.v4.runtime.misc.IntegerList) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 34 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by antlr.

the class LexerATNSimulator method matchATN.

protected int matchATN(CharStream input) {
    ATNState startState = atn.modeToStartState.get(mode);
    if (debug) {
        System.out.format(Locale.getDefault(), "matchATN mode %d start: %s\n", mode, startState);
    }
    int old_mode = mode;
    ATNConfigSet s0_closure = computeStartState(input, startState);
    boolean suppressEdge = s0_closure.hasSemanticContext;
    s0_closure.hasSemanticContext = false;
    DFAState next = addDFAState(s0_closure);
    if (!suppressEdge) {
        decisionToDFA[mode].s0 = next;
    }
    int predict = execATN(input, next);
    if (debug) {
        System.out.format(Locale.getDefault(), "DFA after matchATN: %s\n", decisionToDFA[old_mode].toLexerString());
    }
    return predict;
}
Also used : DFAState(org.antlr.v4.runtime.dfa.DFAState)

Example 35 with ATNState

use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by antlr.

the class ATN method getExpectedTokens.

/**
 * Computes the set of input symbols which could follow ATN state number
 * {@code stateNumber} in the specified full {@code context}. This method
 * considers the complete parser context, but does not evaluate semantic
 * predicates (i.e. all predicates encountered during the calculation are
 * assumed true). If a path in the ATN exists from the starting state to the
 * {@link RuleStopState} of the outermost context without matching any
 * symbols, {@link Token#EOF} is added to the returned set.
 *
 * <p>If {@code context} is {@code null}, it is treated as {@link ParserRuleContext#EMPTY}.</p>
 *
 * Note that this does NOT give you the set of all tokens that could
 * appear at a given token position in the input phrase.  In other words,
 * it does not answer:
 *
 *   "Given a specific partial input phrase, return the set of all tokens
 *    that can follow the last token in the input phrase."
 *
 * The big difference is that with just the input, the parser could
 * land right in the middle of a lookahead decision. Getting
 * all *possible* tokens given a partial input stream is a separate
 * computation. See https://github.com/antlr/antlr4/issues/1428
 *
 * For this function, we are specifying an ATN state and call stack to compute
 * what token(s) can come next and specifically: outside of a lookahead decision.
 * That is what you want for error reporting and recovery upon parse error.
 *
 * @param stateNumber the ATN state number
 * @param context the full parse context
 * @return The set of potentially valid input symbols which could follow the
 * specified state in the specified context.
 * @throws IllegalArgumentException if the ATN does not contain a state with
 * number {@code stateNumber}
 */
public IntervalSet getExpectedTokens(int stateNumber, RuleContext context) {
    if (stateNumber < 0 || stateNumber >= states.size()) {
        throw new IllegalArgumentException("Invalid state number.");
    }
    RuleContext ctx = context;
    ATNState s = states.get(stateNumber);
    IntervalSet following = nextTokens(s);
    if (!following.contains(Token.EPSILON)) {
        return following;
    }
    IntervalSet expected = new IntervalSet();
    expected.addAll(following);
    expected.remove(Token.EPSILON);
    while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
        ATNState invokingState = states.get(ctx.invokingState);
        RuleTransition rt = (RuleTransition) invokingState.transition(0);
        following = nextTokens(rt.followState);
        expected.addAll(following);
        expected.remove(Token.EPSILON);
        ctx = ctx.parent;
    }
    if (following.contains(Token.EPSILON)) {
        expected.add(Token.EOF);
    }
    return expected;
}
Also used : ParserRuleContext(org.antlr.v4.runtime.ParserRuleContext) RuleContext(org.antlr.v4.runtime.RuleContext) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Aggregations

ATNState (org.antlr.v4.runtime.atn.ATNState)94 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)34 ATN (org.antlr.v4.runtime.atn.ATN)25 RuleTransition (org.antlr.v4.runtime.atn.RuleTransition)23 Rule (org.antlr.v4.tool.Rule)22 Transition (org.antlr.v4.runtime.atn.Transition)21 NotNull (org.antlr.v4.runtime.misc.NotNull)19 AtomTransition (org.antlr.v4.runtime.atn.AtomTransition)18 RuleStartState (org.antlr.v4.runtime.atn.RuleStartState)17 ActionTransition (org.antlr.v4.runtime.atn.ActionTransition)16 SetTransition (org.antlr.v4.runtime.atn.SetTransition)16 NotSetTransition (org.antlr.v4.runtime.atn.NotSetTransition)15 DecisionState (org.antlr.v4.runtime.atn.DecisionState)11 DOTGenerator (org.antlr.v4.tool.DOTGenerator)10 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)10 ParserATNFactory (org.antlr.v4.automata.ParserATNFactory)9 LeftRecursiveRule (org.antlr.v4.tool.LeftRecursiveRule)9 ArrayList (java.util.ArrayList)8 EpsilonTransition (org.antlr.v4.runtime.atn.EpsilonTransition)8 PrecedencePredicateTransition (org.antlr.v4.runtime.atn.PrecedencePredicateTransition)8