Search in sources :

Example 1 with IntervalSet

use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.

the class ATNDeserializer method deserializeSets.

private int deserializeSets(char[] data, int p, List<IntervalSet> sets, UnicodeDeserializer unicodeDeserializer) {
    int nsets = toInt(data[p++]);
    for (int i = 0; i < nsets; i++) {
        int nintervals = toInt(data[p]);
        p++;
        IntervalSet set = new IntervalSet();
        sets.add(set);
        boolean containsEof = toInt(data[p++]) != 0;
        if (containsEof) {
            set.add(-1);
        }
        for (int j = 0; j < nintervals; j++) {
            int a = unicodeDeserializer.readUnicode(data, p);
            p += unicodeDeserializer.size();
            int b = unicodeDeserializer.readUnicode(data, p);
            p += unicodeDeserializer.size();
            set.add(a, b);
        }
    }
    return p;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 2 with IntervalSet

use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.

the class ATNSerializer method serialize.

/** Serialize state descriptors, edge descriptors, and decision&rarr;state map
	 *  into list of ints:
	 *
	 * 		grammar-type, (ANTLRParser.LEXER, ...)
	 *  	max token type,
	 *  	num states,
	 *  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
	 *  	num rules,
	 *  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
	 *  	(args are token type,actionIndex in lexer else 0,0)
	 *      num modes,
	 *      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
	 *      num unicode-bmp-sets
	 *      bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
	 *      num unicode-smp-sets
	 *      smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
	 *	num total edges,
	 *      src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
	 *      num decisions,
	 *      decision-0-start-state, decision-1-start-state, ...
	 *
	 *  Convenient to pack into unsigned shorts to make as Java string.
	 */
public IntegerList serialize() {
    IntegerList data = new IntegerList();
    data.add(ATNDeserializer.SERIALIZED_VERSION);
    serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
    // convert grammar type to ATN const to avoid dependence on ANTLRParser
    data.add(atn.grammarType.ordinal());
    data.add(atn.maxTokenType);
    int nedges = 0;
    // Note that we use a LinkedHashMap as a set to
    // maintain insertion order while deduplicating
    // entries with the same key.
    Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
    // dump states, count edges and collect sets while doing so
    IntegerList nonGreedyStates = new IntegerList();
    IntegerList precedenceStates = new IntegerList();
    data.add(atn.states.size());
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            data.add(ATNState.INVALID_TYPE);
            continue;
        }
        int stateType = s.getStateType();
        if (s instanceof DecisionState && ((DecisionState) s).nonGreedy) {
            nonGreedyStates.add(s.stateNumber);
        }
        if (s instanceof RuleStartState && ((RuleStartState) s).isLeftRecursiveRule) {
            precedenceStates.add(s.stateNumber);
        }
        data.add(stateType);
        if (s.ruleIndex == -1) {
            data.add(Character.MAX_VALUE);
        } else {
            data.add(s.ruleIndex);
        }
        if (s.getStateType() == ATNState.LOOP_END) {
            data.add(((LoopEndState) s).loopBackState.stateNumber);
        } else if (s instanceof BlockStartState) {
            data.add(((BlockStartState) s).endState.stateNumber);
        }
        if (s.getStateType() != ATNState.RULE_STOP) {
            // the deserializer can trivially derive these edges, so there's no need to serialize them
            nedges += s.getNumberOfTransitions();
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            int edgeType = Transition.serializationTypes.get(t.getClass());
            if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
                SetTransition st = (SetTransition) t;
                sets.put(st.set, true);
            }
        }
    }
    // non-greedy states
    data.add(nonGreedyStates.size());
    for (int i = 0; i < nonGreedyStates.size(); i++) {
        data.add(nonGreedyStates.get(i));
    }
    // precedence states
    data.add(precedenceStates.size());
    for (int i = 0; i < precedenceStates.size(); i++) {
        data.add(precedenceStates.get(i));
    }
    int nrules = atn.ruleToStartState.length;
    data.add(nrules);
    for (int r = 0; r < nrules; r++) {
        ATNState ruleStartState = atn.ruleToStartState[r];
        data.add(ruleStartState.stateNumber);
        if (atn.grammarType == ATNType.LEXER) {
            if (atn.ruleToTokenType[r] == Token.EOF) {
                data.add(Character.MAX_VALUE);
            } else {
                data.add(atn.ruleToTokenType[r]);
            }
        }
    }
    int nmodes = atn.modeToStartState.size();
    data.add(nmodes);
    if (nmodes > 0) {
        for (ATNState modeStartState : atn.modeToStartState) {
            data.add(modeStartState.stateNumber);
        }
    }
    List<IntervalSet> bmpSets = new ArrayList<>();
    List<IntervalSet> smpSets = new ArrayList<>();
    for (IntervalSet set : sets.keySet()) {
        if (set.getMaxElement() <= Character.MAX_VALUE) {
            bmpSets.add(set);
        } else {
            smpSets.add(set);
        }
    }
    serializeSets(data, bmpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            data.add(cp);
        }
    });
    serializeSets(data, smpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            serializeInt(data, cp);
        }
    });
    Map<IntervalSet, Integer> setIndices = new HashMap<>();
    int setIndex = 0;
    for (IntervalSet bmpSet : bmpSets) {
        setIndices.put(bmpSet, setIndex++);
    }
    for (IntervalSet smpSet : smpSets) {
        setIndices.put(smpSet, setIndex++);
    }
    data.add(nedges);
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            continue;
        }
        if (s.getStateType() == ATNState.RULE_STOP) {
            continue;
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            if (atn.states.get(t.target.stateNumber) == null) {
                throw new IllegalStateException("Cannot serialize a transition to a removed state.");
            }
            int src = s.stateNumber;
            int trg = t.target.stateNumber;
            int edgeType = Transition.serializationTypes.get(t.getClass());
            int arg1 = 0;
            int arg2 = 0;
            int arg3 = 0;
            switch(edgeType) {
                case Transition.RULE:
                    trg = ((RuleTransition) t).followState.stateNumber;
                    arg1 = ((RuleTransition) t).target.stateNumber;
                    arg2 = ((RuleTransition) t).ruleIndex;
                    arg3 = ((RuleTransition) t).precedence;
                    break;
                case Transition.PRECEDENCE:
                    PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
                    arg1 = ppt.precedence;
                    break;
                case Transition.PREDICATE:
                    PredicateTransition pt = (PredicateTransition) t;
                    arg1 = pt.ruleIndex;
                    arg2 = pt.predIndex;
                    arg3 = pt.isCtxDependent ? 1 : 0;
                    break;
                case Transition.RANGE:
                    arg1 = ((RangeTransition) t).from;
                    arg2 = ((RangeTransition) t).to;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ATOM:
                    arg1 = ((AtomTransition) t).label;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ACTION:
                    ActionTransition at = (ActionTransition) t;
                    arg1 = at.ruleIndex;
                    arg2 = at.actionIndex;
                    if (arg2 == -1) {
                        arg2 = 0xFFFF;
                    }
                    arg3 = at.isCtxDependent ? 1 : 0;
                    break;
                case Transition.SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.NOT_SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.WILDCARD:
                    break;
            }
            data.add(src);
            data.add(trg);
            data.add(edgeType);
            data.add(arg1);
            data.add(arg2);
            data.add(arg3);
        }
    }
    int ndecisions = atn.decisionToState.size();
    data.add(ndecisions);
    for (DecisionState decStartState : atn.decisionToState) {
        data.add(decStartState.stateNumber);
    }
    //
    if (atn.grammarType == ATNType.LEXER) {
        data.add(atn.lexerActions.length);
        for (LexerAction action : atn.lexerActions) {
            data.add(action.getActionType().ordinal());
            switch(action.getActionType()) {
                case CHANNEL:
                    int channel = ((LexerChannelAction) action).getChannel();
                    data.add(channel != -1 ? channel : 0xFFFF);
                    data.add(0);
                    break;
                case CUSTOM:
                    int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
                    int actionIndex = ((LexerCustomAction) action).getActionIndex();
                    data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
                    data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
                    break;
                case MODE:
                    int mode = ((LexerModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case MORE:
                    data.add(0);
                    data.add(0);
                    break;
                case POP_MODE:
                    data.add(0);
                    data.add(0);
                    break;
                case PUSH_MODE:
                    mode = ((LexerPushModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case SKIP:
                    data.add(0);
                    data.add(0);
                    break;
                case TYPE:
                    int type = ((LexerTypeAction) action).getType();
                    data.add(type != -1 ? type : 0xFFFF);
                    data.add(0);
                    break;
                default:
                    String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
                    throw new IllegalArgumentException(message);
            }
        }
    }
    // don't adjust the first value since that's the version number
    for (int i = 1; i < data.size(); i++) {
        if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
            throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
        }
        int value = (data.get(i) + 2) & 0xFFFF;
        data.set(i, value);
    }
    return data;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) IntegerList(org.antlr.v4.runtime.misc.IntegerList) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 3 with IntervalSet

use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.

the class LL1Analyzer method getDecisionLookahead.

/**
	 * Calculates the SLL(1) expected lookahead set for each outgoing transition
	 * of an {@link ATNState}. The returned array has one element for each
	 * outgoing transition in {@code s}. If the closure from transition
	 * <em>i</em> leads to a semantic predicate before matching a symbol, the
	 * element at index <em>i</em> of the result will be {@code null}.
	 *
	 * @param s the ATN state
	 * @return the expected symbols for each outgoing transition of {@code s}.
	 */
public IntervalSet[] getDecisionLookahead(ATNState s) {
    //		System.out.println("LOOK("+s.stateNumber+")");
    if (s == null) {
        return null;
    }
    IntervalSet[] look = new IntervalSet[s.getNumberOfTransitions()];
    for (int alt = 0; alt < s.getNumberOfTransitions(); alt++) {
        look[alt] = new IntervalSet();
        Set<ATNConfig> lookBusy = new HashSet<ATNConfig>();
        // fail to get lookahead upon pred
        boolean seeThruPreds = false;
        _LOOK(s.transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false);
        // or we had a predicate when we !seeThruPreds
        if (look[alt].size() == 0 || look[alt].contains(HIT_PRED)) {
            look[alt] = null;
        }
    }
    return look;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) BitSet(java.util.BitSet) HashSet(java.util.HashSet)

Example 4 with IntervalSet

use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.

the class LL1Analyzer method LOOK.

/**
	 * Compute set of tokens that can follow {@code s} in the ATN in the
	 * specified {@code ctx}.
	 *
	 * <p>If {@code ctx} is {@code null} and the end of the rule containing
	 * {@code s} is reached, {@link Token#EPSILON} is added to the result set.
	 * If {@code ctx} is not {@code null} and the end of the outermost rule is
	 * reached, {@link Token#EOF} is added to the result set.</p>
	 *
	 * @param s the ATN state
	 * @param stopState the ATN state to stop at. This can be a
	 * {@link BlockEndState} to detect epsilon paths through a closure.
	 * @param ctx the complete parser context, or {@code null} if the context
	 * should be ignored
	 *
	 * @return The set of tokens that can follow {@code s} in the ATN in the
	 * specified {@code ctx}.
	 */
public IntervalSet LOOK(ATNState s, ATNState stopState, RuleContext ctx) {
    IntervalSet r = new IntervalSet();
    // ignore preds; get all lookahead
    boolean seeThruPreds = true;
    PredictionContext lookContext = ctx != null ? PredictionContext.fromRuleContext(s.atn, ctx) : null;
    _LOOK(s, stopState, lookContext, r, new HashSet<ATNConfig>(), new BitSet(), seeThruPreds, true);
    return r;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) BitSet(java.util.BitSet)

Example 5 with IntervalSet

use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.

the class DefaultErrorStrategy method reportUnwantedToken.

/**
	 * This method is called to report a syntax error which requires the removal
	 * of a token from the input stream. At the time this method is called, the
	 * erroneous symbol is current {@code LT(1)} symbol and has not yet been
	 * removed from the input stream. When this method returns,
	 * {@code recognizer} is in error recovery mode.
	 *
	 * <p>This method is called when {@link #singleTokenDeletion} identifies
	 * single-token deletion as a viable recovery strategy for a mismatched
	 * input error.</p>
	 *
	 * <p>The default implementation simply returns if the handler is already in
	 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
	 * enter error recovery mode, followed by calling
	 * {@link Parser#notifyErrorListeners}.</p>
	 *
	 * @param recognizer the parser instance
	 */
protected void reportUnwantedToken(Parser recognizer) {
    if (inErrorRecoveryMode(recognizer)) {
        return;
    }
    beginErrorCondition(recognizer);
    Token t = recognizer.getCurrentToken();
    String tokenName = getTokenErrorDisplay(t);
    IntervalSet expecting = getExpectedTokens(recognizer);
    String msg = "extraneous input " + tokenName + " expecting " + expecting.toString(recognizer.getVocabulary());
    recognizer.notifyErrorListeners(t, msg, null);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Aggregations

IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)84 Test (org.junit.Test)48 ATNState (org.antlr.v4.runtime.atn.ATNState)11 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)10 ATN (org.antlr.v4.runtime.atn.ATN)8 ArrayList (java.util.ArrayList)7 Grammar (org.antlr.v4.tool.Grammar)7 Interval (org.antlr.v4.runtime.misc.Interval)6 SetTransition (org.antlr.v4.runtime.atn.SetTransition)5 UnicodeSet (com.ibm.icu.text.UnicodeSet)4 HashMap (java.util.HashMap)4 Token (org.antlr.runtime.Token)4 NotSetTransition (org.antlr.v4.runtime.atn.NotSetTransition)4 BaseJavaTest (org.antlr.v4.test.runtime.java.BaseJavaTest)4 LinkedHashMap (java.util.LinkedHashMap)3 ParserRuleContext (org.antlr.v4.runtime.ParserRuleContext)3 AtomTransition (org.antlr.v4.runtime.atn.AtomTransition)3 DecisionState (org.antlr.v4.runtime.atn.DecisionState)3 RuleTransition (org.antlr.v4.runtime.atn.RuleTransition)3 Transition (org.antlr.v4.runtime.atn.Transition)3