Search in sources :

Example 31 with IntegerList

use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.

the class ParserATNSimulator method computeTargetState.

/**
 * Compute a target state for an edge in the DFA, and attempt to add the
 * computed state and corresponding edge to the DFA.
 *
 * @param dfa
 * @param s The current DFA state
 * @param remainingGlobalContext
 * @param t The next input symbol
 * @param useContext
 * @param contextCache
 *
 * @return The computed target DFA state for the given input symbol
 * {@code t}. If {@code t} does not lead to a valid DFA state, this method
 * returns {@link #ERROR}.
 */
@NotNull
protected Tuple2<DFAState, ParserRuleContext> computeTargetState(@NotNull DFA dfa, @NotNull DFAState s, ParserRuleContext remainingGlobalContext, int t, boolean useContext, PredictionContextCache contextCache) {
    List<ATNConfig> closureConfigs = new ArrayList<ATNConfig>(s.configs);
    IntegerList contextElements = null;
    ATNConfigSet reach = new ATNConfigSet();
    boolean stepIntoGlobal;
    do {
        boolean hasMoreContext = !useContext || remainingGlobalContext != null;
        if (!hasMoreContext) {
            reach.setOutermostConfigSet(true);
        }
        ATNConfigSet reachIntermediate = new ATNConfigSet();
        /* Configurations already in a rule stop state indicate reaching the end
			 * of the decision rule (local context) or end of the start rule (full
			 * context). Once reached, these configurations are never updated by a
			 * closure operation, so they are handled separately for the performance
			 * advantage of having a smaller intermediate set when calling closure.
			 *
			 * For full-context reach operations, separate handling is required to
			 * ensure that the alternative matching the longest overall sequence is
			 * chosen when multiple such configurations can match the input.
			 */
        List<ATNConfig> skippedStopStates = null;
        for (ATNConfig c : closureConfigs) {
            if (debug)
                System.out.println("testing " + getTokenName(t) + " at " + c.toString());
            if (c.getState() instanceof RuleStopState) {
                assert c.getContext().isEmpty();
                if (useContext && !c.getReachesIntoOuterContext() || t == IntStream.EOF) {
                    if (skippedStopStates == null) {
                        skippedStopStates = new ArrayList<ATNConfig>();
                    }
                    skippedStopStates.add(c);
                }
                continue;
            }
            int n = c.getState().getNumberOfOptimizedTransitions();
            for (int ti = 0; ti < n; ti++) {
                // for each optimized transition
                Transition trans = c.getState().getOptimizedTransition(ti);
                ATNState target = getReachableTarget(c, trans, t);
                if (target != null) {
                    reachIntermediate.add(c.transform(target, false), contextCache);
                }
            }
        }
        /* This block optimizes the reach operation for intermediate sets which
			 * trivially indicate a termination state for the overall
			 * adaptivePredict operation.
			 *
			 * The conditions assume that intermediate
			 * contains all configurations relevant to the reach set, but this
			 * condition is not true when one or more configurations have been
			 * withheld in skippedStopStates, or when the current symbol is EOF.
			 */
        if (optimize_unique_closure && skippedStopStates == null && t != Token.EOF && reachIntermediate.getUniqueAlt() != ATN.INVALID_ALT_NUMBER) {
            reachIntermediate.setOutermostConfigSet(reach.isOutermostConfigSet());
            reach = reachIntermediate;
            break;
        }
        /* If the reach set could not be trivially determined, perform a closure
			 * operation on the intermediate set to compute its initial value.
			 */
        final boolean collectPredicates = false;
        boolean treatEofAsEpsilon = t == Token.EOF;
        closure(reachIntermediate, reach, collectPredicates, hasMoreContext, contextCache, treatEofAsEpsilon);
        stepIntoGlobal = reach.getDipsIntoOuterContext();
        if (t == IntStream.EOF) {
            /* After consuming EOF no additional input is possible, so we are
				 * only interested in configurations which reached the end of the
				 * decision rule (local context) or end of the start rule (full
				 * context). Update reach to contain only these configurations. This
				 * handles both explicit EOF transitions in the grammar and implicit
				 * EOF transitions following the end of the decision or start rule.
				 *
				 * This is handled before the configurations in skippedStopStates,
				 * because any configurations potentially added from that list are
				 * already guaranteed to meet this condition whether or not it's
				 * required.
				 */
            reach = removeAllConfigsNotInRuleStopState(reach, contextCache);
        }
        /* If skippedStopStates is not null, then it contains at least one
			 * configuration. For full-context reach operations, these
			 * configurations reached the end of the start rule, in which case we
			 * only add them back to reach if no configuration during the current
			 * closure operation reached such a state. This ensures adaptivePredict
			 * chooses an alternative matching the longest overall sequence when
			 * multiple alternatives are viable.
			 */
        if (skippedStopStates != null && (!useContext || !PredictionMode.hasConfigInRuleStopState(reach))) {
            assert !skippedStopStates.isEmpty();
            for (ATNConfig c : skippedStopStates) {
                reach.add(c, contextCache);
            }
        }
        if (useContext && stepIntoGlobal) {
            reach.clear();
            remainingGlobalContext = skipTailCalls(remainingGlobalContext);
            int nextContextElement = getReturnState(remainingGlobalContext);
            if (contextElements == null) {
                contextElements = new IntegerList();
            }
            if (remainingGlobalContext.isEmpty()) {
                remainingGlobalContext = null;
            } else {
                remainingGlobalContext = remainingGlobalContext.getParent();
            }
            contextElements.add(nextContextElement);
            if (nextContextElement != PredictionContext.EMPTY_FULL_STATE_KEY) {
                for (int i = 0; i < closureConfigs.size(); i++) {
                    closureConfigs.set(i, closureConfigs.get(i).appendContext(nextContextElement, contextCache));
                }
            }
        }
    } while (useContext && stepIntoGlobal);
    if (reach.isEmpty()) {
        addDFAEdge(s, t, ERROR);
        return Tuple.create(ERROR, remainingGlobalContext);
    }
    DFAState result = addDFAEdge(dfa, s, t, contextElements, reach, contextCache);
    return Tuple.create(result, remainingGlobalContext);
}
Also used : DFAState(org.antlr.v4.runtime.dfa.DFAState) ArrayList(java.util.ArrayList) IntegerList(org.antlr.v4.runtime.misc.IntegerList) NotNull(org.antlr.v4.runtime.misc.NotNull)

Example 32 with IntegerList

use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.

the class BaseTest method getTokenTypesViaATN.

public IntegerList getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) {
    CharStream in = CharStreams.fromString(input);
    IntegerList tokenTypes = new IntegerList();
    int ttype;
    do {
        ttype = lexerATN.match(in, Lexer.DEFAULT_MODE);
        tokenTypes.add(ttype);
    } while (ttype != Token.EOF);
    return tokenTypes;
}
Also used : IntegerList(org.antlr.v4.runtime.misc.IntegerList) CharStream(org.antlr.v4.runtime.CharStream)

Example 33 with IntegerList

use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.

the class ATNSerializer method serialize.

/**
 * Serialize state descriptors, edge descriptors, and decision&rarr;state map
 *  into list of ints:
 *
 * 		grammar-type, (ANTLRParser.LEXER, ...)
 *  	max token type,
 *  	num states,
 *  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
 *  	num rules,
 *  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
 *  	(args are token type,actionIndex in lexer else 0,0)
 *      num modes,
 *      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
 *      num unicode-bmp-sets
 *      bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
 *      num unicode-smp-sets
 *      smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
 *	num total edges,
 *      src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
 *      num decisions,
 *      decision-0-start-state, decision-1-start-state, ...
 *
 *  Convenient to pack into unsigned shorts to make as Java string.
 */
public IntegerList serialize() {
    IntegerList data = new IntegerList();
    data.add(ATNDeserializer.SERIALIZED_VERSION);
    serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
    // convert grammar type to ATN const to avoid dependence on ANTLRParser
    data.add(atn.grammarType.ordinal());
    data.add(atn.maxTokenType);
    int nedges = 0;
    // Note that we use a LinkedHashMap as a set to
    // maintain insertion order while deduplicating
    // entries with the same key.
    Map<IntervalSet, Boolean> sets = new LinkedHashMap<IntervalSet, Boolean>();
    // dump states, count edges and collect sets while doing so
    IntegerList nonGreedyStates = new IntegerList();
    IntegerList sllStates = new IntegerList();
    IntegerList precedenceStates = new IntegerList();
    data.add(atn.states.size());
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            data.add(ATNState.INVALID_TYPE);
            continue;
        }
        int stateType = s.getStateType();
        if (s instanceof DecisionState) {
            DecisionState decisionState = (DecisionState) s;
            if (decisionState.nonGreedy) {
                nonGreedyStates.add(s.stateNumber);
            }
            if (decisionState.sll) {
                sllStates.add(s.stateNumber);
            }
        }
        if (s instanceof RuleStartState && ((RuleStartState) s).isPrecedenceRule) {
            precedenceStates.add(s.stateNumber);
        }
        data.add(stateType);
        if (s.ruleIndex == -1) {
            data.add(Character.MAX_VALUE);
        } else {
            data.add(s.ruleIndex);
        }
        if (s.getStateType() == ATNState.LOOP_END) {
            data.add(((LoopEndState) s).loopBackState.stateNumber);
        } else if (s instanceof BlockStartState) {
            data.add(((BlockStartState) s).endState.stateNumber);
        }
        if (s.getStateType() != ATNState.RULE_STOP) {
            // the deserializer can trivially derive these edges, so there's no need to serialize them
            nedges += s.getNumberOfTransitions();
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            int edgeType = Transition.serializationTypes.get(t.getClass());
            if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
                SetTransition st = (SetTransition) t;
                sets.put(st.set, true);
            }
        }
    }
    // non-greedy states
    data.add(nonGreedyStates.size());
    for (int i = 0; i < nonGreedyStates.size(); i++) {
        data.add(nonGreedyStates.get(i));
    }
    // SLL decisions
    data.add(sllStates.size());
    for (int i = 0; i < sllStates.size(); i++) {
        data.add(sllStates.get(i));
    }
    // precedence states
    data.add(precedenceStates.size());
    for (int i = 0; i < precedenceStates.size(); i++) {
        data.add(precedenceStates.get(i));
    }
    int nrules = atn.ruleToStartState.length;
    data.add(nrules);
    for (int r = 0; r < nrules; r++) {
        ATNState ruleStartState = atn.ruleToStartState[r];
        data.add(ruleStartState.stateNumber);
        boolean leftFactored = ruleNames.get(ruleStartState.ruleIndex).indexOf(ATNSimulator.RULE_VARIANT_DELIMITER) >= 0;
        data.add(leftFactored ? 1 : 0);
        if (atn.grammarType == ATNType.LEXER) {
            if (atn.ruleToTokenType[r] == Token.EOF) {
                data.add(Character.MAX_VALUE);
            } else {
                data.add(atn.ruleToTokenType[r]);
            }
        }
    }
    int nmodes = atn.modeToStartState.size();
    data.add(nmodes);
    if (nmodes > 0) {
        for (ATNState modeStartState : atn.modeToStartState) {
            data.add(modeStartState.stateNumber);
        }
    }
    List<IntervalSet> bmpSets = new ArrayList<IntervalSet>();
    List<IntervalSet> smpSets = new ArrayList<IntervalSet>();
    for (IntervalSet set : sets.keySet()) {
        if (set.getMaxElement() <= Character.MAX_VALUE) {
            bmpSets.add(set);
        } else {
            smpSets.add(set);
        }
    }
    serializeSets(data, bmpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            data.add(cp);
        }
    });
    serializeSets(data, smpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            serializeInt(data, cp);
        }
    });
    Map<IntervalSet, Integer> setIndices = new HashMap<IntervalSet, Integer>();
    int setIndex = 0;
    for (IntervalSet bmpSet : bmpSets) {
        setIndices.put(bmpSet, setIndex++);
    }
    for (IntervalSet smpSet : smpSets) {
        setIndices.put(smpSet, setIndex++);
    }
    data.add(nedges);
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            continue;
        }
        if (s.getStateType() == ATNState.RULE_STOP) {
            continue;
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            if (atn.states.get(t.target.stateNumber) == null) {
                throw new IllegalStateException("Cannot serialize a transition to a removed state.");
            }
            int src = s.stateNumber;
            int trg = t.target.stateNumber;
            int edgeType = Transition.serializationTypes.get(t.getClass());
            int arg1 = 0;
            int arg2 = 0;
            int arg3 = 0;
            switch(edgeType) {
                case Transition.RULE:
                    trg = ((RuleTransition) t).followState.stateNumber;
                    arg1 = ((RuleTransition) t).target.stateNumber;
                    arg2 = ((RuleTransition) t).ruleIndex;
                    arg3 = ((RuleTransition) t).precedence;
                    break;
                case Transition.PRECEDENCE:
                    PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
                    arg1 = ppt.precedence;
                    break;
                case Transition.PREDICATE:
                    PredicateTransition pt = (PredicateTransition) t;
                    arg1 = pt.ruleIndex;
                    arg2 = pt.predIndex;
                    arg3 = pt.isCtxDependent ? 1 : 0;
                    break;
                case Transition.RANGE:
                    arg1 = ((RangeTransition) t).from;
                    arg2 = ((RangeTransition) t).to;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ATOM:
                    arg1 = ((AtomTransition) t).label;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ACTION:
                    ActionTransition at = (ActionTransition) t;
                    arg1 = at.ruleIndex;
                    arg2 = at.actionIndex;
                    if (arg2 == -1) {
                        arg2 = 0xFFFF;
                    }
                    arg3 = at.isCtxDependent ? 1 : 0;
                    break;
                case Transition.SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.NOT_SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.WILDCARD:
                    break;
            }
            data.add(src);
            data.add(trg);
            data.add(edgeType);
            data.add(arg1);
            data.add(arg2);
            data.add(arg3);
        }
    }
    int ndecisions = atn.decisionToState.size();
    data.add(ndecisions);
    for (DecisionState decStartState : atn.decisionToState) {
        data.add(decStartState.stateNumber);
    }
    // 
    if (atn.grammarType == ATNType.LEXER) {
        data.add(atn.lexerActions.length);
        for (LexerAction action : atn.lexerActions) {
            data.add(action.getActionType().ordinal());
            switch(action.getActionType()) {
                case CHANNEL:
                    int channel = ((LexerChannelAction) action).getChannel();
                    data.add(channel != -1 ? channel : 0xFFFF);
                    data.add(0);
                    break;
                case CUSTOM:
                    int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
                    int actionIndex = ((LexerCustomAction) action).getActionIndex();
                    data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
                    data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
                    break;
                case MODE:
                    int mode = ((LexerModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case MORE:
                    data.add(0);
                    data.add(0);
                    break;
                case POP_MODE:
                    data.add(0);
                    data.add(0);
                    break;
                case PUSH_MODE:
                    mode = ((LexerPushModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case SKIP:
                    data.add(0);
                    data.add(0);
                    break;
                case TYPE:
                    int type = ((LexerTypeAction) action).getType();
                    data.add(type != -1 ? type : 0xFFFF);
                    data.add(0);
                    break;
                default:
                    String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
                    throw new IllegalArgumentException(message);
            }
        }
    }
    // don't adjust the first value since that's the version number
    for (int i = 1; i < data.size(); i++) {
        if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
            throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
        }
        int value = (data.get(i) + 2) & 0xFFFF;
        data.set(i, value);
    }
    return data;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) IntegerList(org.antlr.v4.runtime.misc.IntegerList) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 34 with IntegerList

use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.

the class ATNSerializer method serializeSets.

private static void serializeSets(IntegerList data, Collection<IntervalSet> sets, CodePointSerializer codePointSerializer) {
    int nSets = sets.size();
    data.add(nSets);
    for (IntervalSet set : sets) {
        boolean containsEof = set.contains(Token.EOF);
        if (containsEof && set.getIntervals().get(0).b == Token.EOF) {
            data.add(set.getIntervals().size() - 1);
        } else {
            data.add(set.getIntervals().size());
        }
        data.add(containsEof ? 1 : 0);
        for (Interval I : set.getIntervals()) {
            if (I.a == Token.EOF) {
                if (I.b == Token.EOF) {
                    continue;
                } else {
                    codePointSerializer.serializeCodePoint(data, 0);
                }
            } else {
                codePointSerializer.serializeCodePoint(data, I.a);
            }
            codePointSerializer.serializeCodePoint(data, I.b);
        }
    }
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) Interval(org.antlr.v4.runtime.misc.Interval)

Example 35 with IntegerList

use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.

the class TestATNParserPrediction method checkPredictedAlt.

/**
 * first check that the ATN predicts right alt.
 *  Then check adaptive prediction.
 */
public void checkPredictedAlt(LexerGrammar lg, Grammar g, int decision, String inputString, int expectedAlt) {
    Tool.internalOption_ShowATNConfigsInDFA = true;
    ATN lexatn = createATN(lg, true);
    LexerATNSimulator lexInterp = new LexerATNSimulator(lexatn);
    IntegerList types = getTokenTypesViaATN(inputString, lexInterp);
    System.out.println(types);
    semanticProcess(lg);
    g.importVocab(lg);
    semanticProcess(g);
    ParserATNFactory f = new ParserATNFactory(g);
    ATN atn = f.createATN();
    DOTGenerator dot = new DOTGenerator(g);
    Rule r = g.getRule("a");
    if (r != null)
        System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("b");
    if (r != null)
        System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("e");
    if (r != null)
        System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("ifstat");
    if (r != null)
        System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("block");
    if (r != null)
        System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    // Check ATN prediction
    // ParserATNSimulator<Token> interp = new ParserATNSimulator<Token>(atn);
    TokenStream input = new IntTokenStream(types);
    ParserInterpreterForTesting interp = new ParserInterpreterForTesting(g, input);
    DecisionState startState = atn.decisionToState.get(decision);
    DFA dfa = new DFA(startState, decision);
    int alt = interp.adaptivePredict(input, decision, ParserRuleContext.emptyContext());
    System.out.println(dot.getDOT(dfa, false));
    assertEquals(expectedAlt, alt);
    // Check adaptive prediction
    input.seek(0);
    alt = interp.adaptivePredict(input, decision, null);
    assertEquals(expectedAlt, alt);
    // run 2x; first time creates DFA in atn
    input.seek(0);
    alt = interp.adaptivePredict(input, decision, null);
    assertEquals(expectedAlt, alt);
}
Also used : ParserATNFactory(org.antlr.v4.automata.ParserATNFactory) DOTGenerator(org.antlr.v4.tool.DOTGenerator) TokenStream(org.antlr.v4.runtime.TokenStream) LexerATNSimulator(org.antlr.v4.runtime.atn.LexerATNSimulator) IntegerList(org.antlr.v4.runtime.misc.IntegerList) ATN(org.antlr.v4.runtime.atn.ATN) Rule(org.antlr.v4.tool.Rule) LeftRecursiveRule(org.antlr.v4.tool.LeftRecursiveRule) DecisionState(org.antlr.v4.runtime.atn.DecisionState) DFA(org.antlr.v4.runtime.dfa.DFA)

Aggregations

IntegerList (org.antlr.v4.runtime.misc.IntegerList)42 Test (org.junit.Test)12 STGroupString (org.stringtemplate.v4.STGroupString)7 ANTLRInputStream (org.antlr.v4.runtime.ANTLRInputStream)6 ATN (org.antlr.v4.runtime.atn.ATN)6 LexerATNSimulator (org.antlr.v4.runtime.atn.LexerATNSimulator)6 ArrayList (java.util.ArrayList)5 TokenStream (org.antlr.v4.runtime.TokenStream)5 DFA (org.antlr.v4.runtime.dfa.DFA)5 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)5 BaseRuntimeTest.antlrOnString (org.antlr.v4.test.runtime.BaseRuntimeTest.antlrOnString)5 ParserATNFactory (org.antlr.v4.automata.ParserATNFactory)4 DOTGenerator (org.antlr.v4.tool.DOTGenerator)4 Rule (org.antlr.v4.tool.Rule)4 HashMap (java.util.HashMap)3 ATNState (org.antlr.v4.runtime.atn.ATNState)3 BlockStartState (org.antlr.v4.runtime.atn.BlockStartState)3 MockIntTokenStream (org.antlr.v4.test.runtime.MockIntTokenStream)3 RuntimeTestUtils.getTokenTypesViaATN (org.antlr.v4.test.runtime.RuntimeTestUtils.getTokenTypesViaATN)3 BufferedWriter (java.io.BufferedWriter)2