Search in sources :

Example 1 with SetTransition

use of org.antlr.v4.runtime.atn.SetTransition in project antlr4 by tunnelvisionlabs.

the class ATNOptimizer method optimizeSets.

private static void optimizeSets(Grammar g, ATN atn) {
    if (g.isParser()) {
        // parser codegen doesn't currently support SetTransition
        return;
    }
    int removedStates = 0;
    List<DecisionState> decisions = atn.decisionToState;
    for (DecisionState decision : decisions) {
        if (decision.ruleIndex >= 0) {
            Rule rule = g.getRule(decision.ruleIndex);
            if (Character.isLowerCase(rule.name.charAt(0))) {
                // parser codegen doesn't currently support SetTransition
                continue;
            }
        }
        IntervalSet setTransitions = new IntervalSet();
        for (int i = 0; i < decision.getNumberOfTransitions(); i++) {
            Transition epsTransition = decision.transition(i);
            if (!(epsTransition instanceof EpsilonTransition)) {
                continue;
            }
            if (epsTransition.target.getNumberOfTransitions() != 1) {
                continue;
            }
            Transition transition = epsTransition.target.transition(0);
            if (!(transition.target instanceof BlockEndState)) {
                continue;
            }
            if (transition instanceof NotSetTransition) {
                // TODO: not yet implemented
                continue;
            }
            if (transition instanceof AtomTransition || transition instanceof RangeTransition || transition instanceof SetTransition) {
                setTransitions.add(i);
            }
        }
        // due to min alt resolution policies, can only collapse sequential alts
        for (int i = setTransitions.getIntervals().size() - 1; i >= 0; i--) {
            Interval interval = setTransitions.getIntervals().get(i);
            if (interval.length() <= 1) {
                continue;
            }
            ATNState blockEndState = decision.transition(interval.a).target.transition(0).target;
            IntervalSet matchSet = new IntervalSet();
            for (int j = interval.a; j <= interval.b; j++) {
                Transition matchTransition = decision.transition(j).target.transition(0);
                if (matchTransition instanceof NotSetTransition) {
                    throw new UnsupportedOperationException("Not yet implemented.");
                }
                IntervalSet set = matchTransition.label();
                List<Interval> intervals = set.getIntervals();
                int n = intervals.size();
                for (int k = 0; k < n; k++) {
                    Interval setInterval = intervals.get(k);
                    int a = setInterval.a;
                    int b = setInterval.b;
                    if (a != -1 && b != -1) {
                        for (int v = a; v <= b; v++) {
                            if (matchSet.contains(v)) {
                                // TODO: Token is missing (i.e. position in source will not be displayed).
                                g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, null, CharSupport.getANTLRCharLiteralForChar(v), CharSupport.getIntervalSetEscapedString(matchSet));
                                break;
                            }
                        }
                    }
                }
                matchSet.addAll(set);
            }
            Transition newTransition;
            if (matchSet.getIntervals().size() == 1) {
                if (matchSet.size() == 1) {
                    newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement());
                } else {
                    Interval matchInterval = matchSet.getIntervals().get(0);
                    newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b);
                }
            } else {
                newTransition = new SetTransition(blockEndState, matchSet);
            }
            decision.transition(interval.a).target.setTransition(0, newTransition);
            for (int j = interval.a + 1; j <= interval.b; j++) {
                Transition removed = decision.removeTransition(interval.a + 1);
                atn.removeState(removed.target);
                removedStates++;
            }
        }
    }
// System.out.println("ATN optimizer removed " + removedStates + " states by collapsing sets.");
}
Also used : AtomTransition(org.antlr.v4.runtime.atn.AtomTransition) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) SetTransition(org.antlr.v4.runtime.atn.SetTransition) DecisionState(org.antlr.v4.runtime.atn.DecisionState) BlockEndState(org.antlr.v4.runtime.atn.BlockEndState) RangeTransition(org.antlr.v4.runtime.atn.RangeTransition) ATNState(org.antlr.v4.runtime.atn.ATNState) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) AtomTransition(org.antlr.v4.runtime.atn.AtomTransition) EpsilonTransition(org.antlr.v4.runtime.atn.EpsilonTransition) SetTransition(org.antlr.v4.runtime.atn.SetTransition) RangeTransition(org.antlr.v4.runtime.atn.RangeTransition) Transition(org.antlr.v4.runtime.atn.Transition) Rule(org.antlr.v4.tool.Rule) EpsilonTransition(org.antlr.v4.runtime.atn.EpsilonTransition) Interval(org.antlr.v4.runtime.misc.Interval)

Example 2 with SetTransition

use of org.antlr.v4.runtime.atn.SetTransition in project antlr4 by tunnelvisionlabs.

the class LexerATNFactory method charSetLiteral.

/**
 * [Aa\t \u1234a-z\]\p{Letter}\-] char sets
 */
@Override
public Handle charSetLiteral(GrammarAST charSetAST) {
    ATNState left = newState(charSetAST);
    ATNState right = newState(charSetAST);
    IntervalSet set = getSetFromCharSetLiteral(charSetAST);
    left.addTransition(new SetTransition(right, set));
    charSetAST.atnState = left;
    return new Handle(left, right);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) SetTransition(org.antlr.v4.runtime.atn.SetTransition) ATNState(org.antlr.v4.runtime.atn.ATNState)

Example 3 with SetTransition

use of org.antlr.v4.runtime.atn.SetTransition in project antlr4 by tunnelvisionlabs.

the class LexerATNFactory method set.

@Override
public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean invert) {
    ATNState left = newState(associatedAST);
    ATNState right = newState(associatedAST);
    IntervalSet set = new IntervalSet();
    for (GrammarAST t : alts) {
        if (t.getType() == ANTLRParser.RANGE) {
            int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
            int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
            if (checkRange((GrammarAST) t.getChild(0), (GrammarAST) t.getChild(1), a, b)) {
                checkSetCollision(associatedAST, set, a, b);
                set.add(a, b);
            }
        } else if (t.getType() == ANTLRParser.LEXER_CHAR_SET) {
            set.addAll(getSetFromCharSetLiteral(t));
        } else if (t.getType() == ANTLRParser.STRING_LITERAL) {
            int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
            if (c != -1) {
                checkSetCollision(associatedAST, set, c);
                set.add(c);
            } else {
                g.tool.errMgr.grammarError(ErrorType.INVALID_LITERAL_IN_LEXER_SET, g.fileName, t.getToken(), t.getText());
            }
        } else if (t.getType() == ANTLRParser.TOKEN_REF) {
            g.tool.errMgr.grammarError(ErrorType.UNSUPPORTED_REFERENCE_IN_LEXER_SET, g.fileName, t.getToken(), t.getText());
        }
    }
    if (invert) {
        left.addTransition(new NotSetTransition(right, set));
    } else {
        Transition transition;
        if (set.getIntervals().size() == 1) {
            Interval interval = set.getIntervals().get(0);
            transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b);
        } else {
            transition = new SetTransition(right, set);
        }
        left.addTransition(transition);
    }
    associatedAST.atnState = left;
    return new Handle(left, right);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) ActionTransition(org.antlr.v4.runtime.atn.ActionTransition) Transition(org.antlr.v4.runtime.atn.Transition) AtomTransition(org.antlr.v4.runtime.atn.AtomTransition) SetTransition(org.antlr.v4.runtime.atn.SetTransition) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) SetTransition(org.antlr.v4.runtime.atn.SetTransition) ATNState(org.antlr.v4.runtime.atn.ATNState) Interval(org.antlr.v4.runtime.misc.Interval)

Example 4 with SetTransition

use of org.antlr.v4.runtime.atn.SetTransition in project antlr4 by tunnelvisionlabs.

the class ParserATNFactory method set.

/**
 * From set build single edge graph {@code o->o-set->o}.  To conform to
 *  what an alt block looks like, must have extra state on left.
 *  This also handles {@code ~A}, converted to {@code ~{A}} set.
 */
@NotNull
@Override
public Handle set(@NotNull GrammarAST associatedAST, @NotNull List<GrammarAST> terminals, boolean invert) {
    ATNState left = newState(associatedAST);
    ATNState right = newState(associatedAST);
    IntervalSet set = new IntervalSet();
    for (GrammarAST t : terminals) {
        int ttype = g.getTokenType(t.getText());
        set.add(ttype);
    }
    if (invert) {
        left.addTransition(new NotSetTransition(right, set));
    } else {
        left.addTransition(new SetTransition(right, set));
    }
    associatedAST.atnState = left;
    return new Handle(left, right);
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) GrammarAST(org.antlr.v4.tool.ast.GrammarAST) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) NotSetTransition(org.antlr.v4.runtime.atn.NotSetTransition) SetTransition(org.antlr.v4.runtime.atn.SetTransition) ATNState(org.antlr.v4.runtime.atn.ATNState) NotNull(org.antlr.v4.runtime.misc.NotNull)

Example 5 with SetTransition

use of org.antlr.v4.runtime.atn.SetTransition in project antlr4 by antlr.

the class ATNSerializer method serialize.

/**
 * Serialize state descriptors, edge descriptors, and decision&rarr;state map
 *  into list of ints:
 *
 * 		grammar-type, (ANTLRParser.LEXER, ...)
 *  	max token type,
 *  	num states,
 *  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
 *  	num rules,
 *  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
 *  	(args are token type,actionIndex in lexer else 0,0)
 *      num modes,
 *      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
 *      num unicode-bmp-sets
 *      bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
 *      num unicode-smp-sets
 *      smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
 *	num total edges,
 *      src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
 *      num decisions,
 *      decision-0-start-state, decision-1-start-state, ...
 *
 *  Convenient to pack into unsigned shorts to make as Java string.
 */
public IntegerList serialize() {
    IntegerList data = new IntegerList();
    data.add(ATNDeserializer.SERIALIZED_VERSION);
    serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
    // convert grammar type to ATN const to avoid dependence on ANTLRParser
    data.add(atn.grammarType.ordinal());
    data.add(atn.maxTokenType);
    int nedges = 0;
    // Note that we use a LinkedHashMap as a set to
    // maintain insertion order while deduplicating
    // entries with the same key.
    Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
    // dump states, count edges and collect sets while doing so
    IntegerList nonGreedyStates = new IntegerList();
    IntegerList precedenceStates = new IntegerList();
    data.add(atn.states.size());
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            data.add(ATNState.INVALID_TYPE);
            continue;
        }
        int stateType = s.getStateType();
        if (s instanceof DecisionState && ((DecisionState) s).nonGreedy) {
            nonGreedyStates.add(s.stateNumber);
        }
        if (s instanceof RuleStartState && ((RuleStartState) s).isLeftRecursiveRule) {
            precedenceStates.add(s.stateNumber);
        }
        data.add(stateType);
        if (s.ruleIndex == -1) {
            data.add(Character.MAX_VALUE);
        } else {
            data.add(s.ruleIndex);
        }
        if (s.getStateType() == ATNState.LOOP_END) {
            data.add(((LoopEndState) s).loopBackState.stateNumber);
        } else if (s instanceof BlockStartState) {
            data.add(((BlockStartState) s).endState.stateNumber);
        }
        if (s.getStateType() != ATNState.RULE_STOP) {
            // the deserializer can trivially derive these edges, so there's no need to serialize them
            nedges += s.getNumberOfTransitions();
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            int edgeType = Transition.serializationTypes.get(t.getClass());
            if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
                SetTransition st = (SetTransition) t;
                sets.put(st.set, true);
            }
        }
    }
    // non-greedy states
    data.add(nonGreedyStates.size());
    for (int i = 0; i < nonGreedyStates.size(); i++) {
        data.add(nonGreedyStates.get(i));
    }
    // precedence states
    data.add(precedenceStates.size());
    for (int i = 0; i < precedenceStates.size(); i++) {
        data.add(precedenceStates.get(i));
    }
    int nrules = atn.ruleToStartState.length;
    data.add(nrules);
    for (int r = 0; r < nrules; r++) {
        ATNState ruleStartState = atn.ruleToStartState[r];
        data.add(ruleStartState.stateNumber);
        if (atn.grammarType == ATNType.LEXER) {
            if (atn.ruleToTokenType[r] == Token.EOF) {
                data.add(Character.MAX_VALUE);
            } else {
                data.add(atn.ruleToTokenType[r]);
            }
        }
    }
    int nmodes = atn.modeToStartState.size();
    data.add(nmodes);
    if (nmodes > 0) {
        for (ATNState modeStartState : atn.modeToStartState) {
            data.add(modeStartState.stateNumber);
        }
    }
    List<IntervalSet> bmpSets = new ArrayList<>();
    List<IntervalSet> smpSets = new ArrayList<>();
    for (IntervalSet set : sets.keySet()) {
        if (!set.isNil() && set.getMaxElement() <= Character.MAX_VALUE) {
            bmpSets.add(set);
        } else {
            smpSets.add(set);
        }
    }
    serializeSets(data, bmpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            data.add(cp);
        }
    });
    serializeSets(data, smpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            serializeInt(data, cp);
        }
    });
    Map<IntervalSet, Integer> setIndices = new HashMap<>();
    int setIndex = 0;
    for (IntervalSet bmpSet : bmpSets) {
        setIndices.put(bmpSet, setIndex++);
    }
    for (IntervalSet smpSet : smpSets) {
        setIndices.put(smpSet, setIndex++);
    }
    data.add(nedges);
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            continue;
        }
        if (s.getStateType() == ATNState.RULE_STOP) {
            continue;
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            if (atn.states.get(t.target.stateNumber) == null) {
                throw new IllegalStateException("Cannot serialize a transition to a removed state.");
            }
            int src = s.stateNumber;
            int trg = t.target.stateNumber;
            int edgeType = Transition.serializationTypes.get(t.getClass());
            int arg1 = 0;
            int arg2 = 0;
            int arg3 = 0;
            switch(edgeType) {
                case Transition.RULE:
                    trg = ((RuleTransition) t).followState.stateNumber;
                    arg1 = ((RuleTransition) t).target.stateNumber;
                    arg2 = ((RuleTransition) t).ruleIndex;
                    arg3 = ((RuleTransition) t).precedence;
                    break;
                case Transition.PRECEDENCE:
                    PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
                    arg1 = ppt.precedence;
                    break;
                case Transition.PREDICATE:
                    PredicateTransition pt = (PredicateTransition) t;
                    arg1 = pt.ruleIndex;
                    arg2 = pt.predIndex;
                    arg3 = pt.isCtxDependent ? 1 : 0;
                    break;
                case Transition.RANGE:
                    arg1 = ((RangeTransition) t).from;
                    arg2 = ((RangeTransition) t).to;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ATOM:
                    arg1 = ((AtomTransition) t).label;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ACTION:
                    ActionTransition at = (ActionTransition) t;
                    arg1 = at.ruleIndex;
                    arg2 = at.actionIndex;
                    if (arg2 == -1) {
                        arg2 = 0xFFFF;
                    }
                    arg3 = at.isCtxDependent ? 1 : 0;
                    break;
                case Transition.SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.NOT_SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.WILDCARD:
                    break;
            }
            data.add(src);
            data.add(trg);
            data.add(edgeType);
            data.add(arg1);
            data.add(arg2);
            data.add(arg3);
        }
    }
    int ndecisions = atn.decisionToState.size();
    data.add(ndecisions);
    for (DecisionState decStartState : atn.decisionToState) {
        data.add(decStartState.stateNumber);
    }
    // 
    if (atn.grammarType == ATNType.LEXER) {
        data.add(atn.lexerActions.length);
        for (LexerAction action : atn.lexerActions) {
            data.add(action.getActionType().ordinal());
            switch(action.getActionType()) {
                case CHANNEL:
                    int channel = ((LexerChannelAction) action).getChannel();
                    data.add(channel != -1 ? channel : 0xFFFF);
                    data.add(0);
                    break;
                case CUSTOM:
                    int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
                    int actionIndex = ((LexerCustomAction) action).getActionIndex();
                    data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
                    data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
                    break;
                case MODE:
                    int mode = ((LexerModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case MORE:
                    data.add(0);
                    data.add(0);
                    break;
                case POP_MODE:
                    data.add(0);
                    data.add(0);
                    break;
                case PUSH_MODE:
                    mode = ((LexerPushModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case SKIP:
                    data.add(0);
                    data.add(0);
                    break;
                case TYPE:
                    int type = ((LexerTypeAction) action).getType();
                    data.add(type != -1 ? type : 0xFFFF);
                    data.add(0);
                    break;
                default:
                    String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
                    throw new IllegalArgumentException(message);
            }
        }
    }
    // don't adjust the first value since that's the version number
    for (int i = 1; i < data.size(); i++) {
        if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
            throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
        }
        int value = (data.get(i) + 2) & 0xFFFF;
        data.set(i, value);
    }
    return data;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) IntegerList(org.antlr.v4.runtime.misc.IntegerList) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Aggregations

ATNState (org.antlr.v4.runtime.atn.ATNState)13 SetTransition (org.antlr.v4.runtime.atn.SetTransition)13 NotSetTransition (org.antlr.v4.runtime.atn.NotSetTransition)12 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)12 AtomTransition (org.antlr.v4.runtime.atn.AtomTransition)9 Transition (org.antlr.v4.runtime.atn.Transition)9 ActionTransition (org.antlr.v4.runtime.atn.ActionTransition)7 Interval (org.antlr.v4.runtime.misc.Interval)6 RangeTransition (org.antlr.v4.runtime.atn.RangeTransition)5 RuleTransition (org.antlr.v4.runtime.atn.RuleTransition)5 ArrayList (java.util.ArrayList)4 EpsilonTransition (org.antlr.v4.runtime.atn.EpsilonTransition)4 RuleStopState (org.antlr.v4.runtime.atn.RuleStopState)4 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)4 HashMap (java.util.HashMap)3 DecisionState (org.antlr.v4.runtime.atn.DecisionState)3 RuleStartState (org.antlr.v4.runtime.atn.RuleStartState)3 IntegerList (org.antlr.v4.runtime.misc.IntegerList)3 HashSet (java.util.HashSet)2 LinkedHashMap (java.util.LinkedHashMap)2