use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class ATNSerializer method serialize.
/**
* Serialize state descriptors, edge descriptors, and decision→state map
* into list of ints:
*
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
* num modes,
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
* num unicode-bmp-sets
* bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
* num unicode-smp-sets
* smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
* num total edges,
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
* num decisions,
* decision-0-start-state, decision-1-start-state, ...
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public IntegerList serialize() {
IntegerList data = new IntegerList();
data.add(ATNDeserializer.SERIALIZED_VERSION);
serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
// convert grammar type to ATN const to avoid dependence on ANTLRParser
data.add(atn.grammarType.ordinal());
data.add(atn.maxTokenType);
int nedges = 0;
// Note that we use a LinkedHashMap as a set to
// maintain insertion order while deduplicating
// entries with the same key.
Map<IntervalSet, Boolean> sets = new LinkedHashMap<IntervalSet, Boolean>();
// dump states, count edges and collect sets while doing so
IntegerList nonGreedyStates = new IntegerList();
IntegerList sllStates = new IntegerList();
IntegerList precedenceStates = new IntegerList();
data.add(atn.states.size());
for (ATNState s : atn.states) {
if (s == null) {
// might be optimized away
data.add(ATNState.INVALID_TYPE);
continue;
}
int stateType = s.getStateType();
if (s instanceof DecisionState) {
DecisionState decisionState = (DecisionState) s;
if (decisionState.nonGreedy) {
nonGreedyStates.add(s.stateNumber);
}
if (decisionState.sll) {
sllStates.add(s.stateNumber);
}
}
if (s instanceof RuleStartState && ((RuleStartState) s).isPrecedenceRule) {
precedenceStates.add(s.stateNumber);
}
data.add(stateType);
if (s.ruleIndex == -1) {
data.add(Character.MAX_VALUE);
} else {
data.add(s.ruleIndex);
}
if (s.getStateType() == ATNState.LOOP_END) {
data.add(((LoopEndState) s).loopBackState.stateNumber);
} else if (s instanceof BlockStartState) {
data.add(((BlockStartState) s).endState.stateNumber);
}
if (s.getStateType() != ATNState.RULE_STOP) {
// the deserializer can trivially derive these edges, so there's no need to serialize them
nedges += s.getNumberOfTransitions();
}
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
int edgeType = Transition.serializationTypes.get(t.getClass());
if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
SetTransition st = (SetTransition) t;
sets.put(st.set, true);
}
}
}
// non-greedy states
data.add(nonGreedyStates.size());
for (int i = 0; i < nonGreedyStates.size(); i++) {
data.add(nonGreedyStates.get(i));
}
// SLL decisions
data.add(sllStates.size());
for (int i = 0; i < sllStates.size(); i++) {
data.add(sllStates.get(i));
}
// precedence states
data.add(precedenceStates.size());
for (int i = 0; i < precedenceStates.size(); i++) {
data.add(precedenceStates.get(i));
}
int nrules = atn.ruleToStartState.length;
data.add(nrules);
for (int r = 0; r < nrules; r++) {
ATNState ruleStartState = atn.ruleToStartState[r];
data.add(ruleStartState.stateNumber);
boolean leftFactored = ruleNames.get(ruleStartState.ruleIndex).indexOf(ATNSimulator.RULE_VARIANT_DELIMITER) >= 0;
data.add(leftFactored ? 1 : 0);
if (atn.grammarType == ATNType.LEXER) {
if (atn.ruleToTokenType[r] == Token.EOF) {
data.add(Character.MAX_VALUE);
} else {
data.add(atn.ruleToTokenType[r]);
}
}
}
int nmodes = atn.modeToStartState.size();
data.add(nmodes);
if (nmodes > 0) {
for (ATNState modeStartState : atn.modeToStartState) {
data.add(modeStartState.stateNumber);
}
}
List<IntervalSet> bmpSets = new ArrayList<IntervalSet>();
List<IntervalSet> smpSets = new ArrayList<IntervalSet>();
for (IntervalSet set : sets.keySet()) {
if (set.getMaxElement() <= Character.MAX_VALUE) {
bmpSets.add(set);
} else {
smpSets.add(set);
}
}
serializeSets(data, bmpSets, new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
data.add(cp);
}
});
serializeSets(data, smpSets, new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
serializeInt(data, cp);
}
});
Map<IntervalSet, Integer> setIndices = new HashMap<IntervalSet, Integer>();
int setIndex = 0;
for (IntervalSet bmpSet : bmpSets) {
setIndices.put(bmpSet, setIndex++);
}
for (IntervalSet smpSet : smpSets) {
setIndices.put(smpSet, setIndex++);
}
data.add(nedges);
for (ATNState s : atn.states) {
if (s == null) {
// might be optimized away
continue;
}
if (s.getStateType() == ATNState.RULE_STOP) {
continue;
}
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
if (atn.states.get(t.target.stateNumber) == null) {
throw new IllegalStateException("Cannot serialize a transition to a removed state.");
}
int src = s.stateNumber;
int trg = t.target.stateNumber;
int edgeType = Transition.serializationTypes.get(t.getClass());
int arg1 = 0;
int arg2 = 0;
int arg3 = 0;
switch(edgeType) {
case Transition.RULE:
trg = ((RuleTransition) t).followState.stateNumber;
arg1 = ((RuleTransition) t).target.stateNumber;
arg2 = ((RuleTransition) t).ruleIndex;
arg3 = ((RuleTransition) t).precedence;
break;
case Transition.PRECEDENCE:
PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
arg1 = ppt.precedence;
break;
case Transition.PREDICATE:
PredicateTransition pt = (PredicateTransition) t;
arg1 = pt.ruleIndex;
arg2 = pt.predIndex;
arg3 = pt.isCtxDependent ? 1 : 0;
break;
case Transition.RANGE:
arg1 = ((RangeTransition) t).from;
arg2 = ((RangeTransition) t).to;
if (arg1 == Token.EOF) {
arg1 = 0;
arg3 = 1;
}
break;
case Transition.ATOM:
arg1 = ((AtomTransition) t).label;
if (arg1 == Token.EOF) {
arg1 = 0;
arg3 = 1;
}
break;
case Transition.ACTION:
ActionTransition at = (ActionTransition) t;
arg1 = at.ruleIndex;
arg2 = at.actionIndex;
if (arg2 == -1) {
arg2 = 0xFFFF;
}
arg3 = at.isCtxDependent ? 1 : 0;
break;
case Transition.SET:
arg1 = setIndices.get(((SetTransition) t).set);
break;
case Transition.NOT_SET:
arg1 = setIndices.get(((SetTransition) t).set);
break;
case Transition.WILDCARD:
break;
}
data.add(src);
data.add(trg);
data.add(edgeType);
data.add(arg1);
data.add(arg2);
data.add(arg3);
}
}
int ndecisions = atn.decisionToState.size();
data.add(ndecisions);
for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber);
}
//
if (atn.grammarType == ATNType.LEXER) {
data.add(atn.lexerActions.length);
for (LexerAction action : atn.lexerActions) {
data.add(action.getActionType().ordinal());
switch(action.getActionType()) {
case CHANNEL:
int channel = ((LexerChannelAction) action).getChannel();
data.add(channel != -1 ? channel : 0xFFFF);
data.add(0);
break;
case CUSTOM:
int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
int actionIndex = ((LexerCustomAction) action).getActionIndex();
data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
break;
case MODE:
int mode = ((LexerModeAction) action).getMode();
data.add(mode != -1 ? mode : 0xFFFF);
data.add(0);
break;
case MORE:
data.add(0);
data.add(0);
break;
case POP_MODE:
data.add(0);
data.add(0);
break;
case PUSH_MODE:
mode = ((LexerPushModeAction) action).getMode();
data.add(mode != -1 ? mode : 0xFFFF);
data.add(0);
break;
case SKIP:
data.add(0);
data.add(0);
break;
case TYPE:
int type = ((LexerTypeAction) action).getType();
data.add(type != -1 ? type : 0xFFFF);
data.add(0);
break;
default:
String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
throw new IllegalArgumentException(message);
}
}
}
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
}
int value = (data.get(i) + 2) & 0xFFFF;
data.set(i, value);
}
return data;
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class Parser method isExpectedToken.
/**
* Checks whether or not {@code symbol} can follow the current state in the
* ATN. The behavior of this method is equivalent to the following, but is
* implemented such that the complete context-sensitive follow set does not
* need to be explicitly constructed.
*
* <pre>
* return getExpectedTokens().contains(symbol);
* </pre>
*
* @param symbol the symbol type to check
* @return {@code true} if {@code symbol} can follow the current state in
* the ATN, otherwise {@code false}.
*/
public boolean isExpectedToken(int symbol) {
// return getInterpreter().atn.nextTokens(_ctx);
ATN atn = getInterpreter().atn;
ParserRuleContext ctx = _ctx;
ATNState s = atn.states.get(getState());
IntervalSet following = atn.nextTokens(s);
if (following.contains(symbol)) {
return true;
}
// System.out.println("following "+s+"="+following);
if (!following.contains(Token.EPSILON))
return false;
while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition) invokingState.transition(0);
following = atn.nextTokens(rt.followState);
if (following.contains(symbol)) {
return true;
}
ctx = (ParserRuleContext) ctx.parent;
}
if (following.contains(Token.EPSILON) && symbol == Token.EOF) {
return true;
}
return false;
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class ParserInterpreter method parse.
/**
* Begin parsing at startRuleIndex
*/
public ParserRuleContext parse(int startRuleIndex) {
RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex];
rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
if (startRuleStartState.isPrecedenceRule) {
enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
} else {
enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex);
}
while (true) {
ATNState p = getATNState();
switch(p.getStateType()) {
case ATNState.RULE_STOP:
// pop; return from rule
if (_ctx.isEmpty()) {
if (startRuleStartState.isPrecedenceRule) {
ParserRuleContext result = _ctx;
Tuple2<ParserRuleContext, Integer> parentContext = _parentContextStack.pop();
unrollRecursionContexts(parentContext.getItem1());
return result;
} else {
exitRule();
return rootContext;
}
}
visitRuleStopState(p);
break;
default:
try {
visitState(p);
} catch (RecognitionException e) {
setState(atn.ruleToStopState[p.ruleIndex].stateNumber);
getContext().exception = e;
getErrorHandler().reportError(this, e);
recover(e);
}
break;
}
}
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class ParserInterpreter method visitRuleStopState.
protected void visitRuleStopState(ATNState p) {
RuleStartState ruleStartState = atn.ruleToStartState[p.ruleIndex];
if (ruleStartState.isPrecedenceRule) {
Tuple2<ParserRuleContext, Integer> parentContext = _parentContextStack.pop();
unrollRecursionContexts(parentContext.getItem1());
setState(parentContext.getItem2());
} else {
exitRule();
}
RuleTransition ruleTransition = (RuleTransition) atn.states.get(getState()).transition(0);
setState(ruleTransition.followState.stateNumber);
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class DefaultErrorStrategy method sync.
/**
* The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
* that the current lookahead symbol is consistent with what were expecting
* at this point in the ATN. You can call this anytime but ANTLR only
* generates code to check before subrules/loops and each iteration.
*
* <p>Implements Jim Idle's magic sync mechanism in closures and optional
* subrules. E.g.,</p>
*
* <pre>
* a : sync ( stuff sync )* ;
* sync : {consume to what can follow sync} ;
* </pre>
*
* At the start of a sub rule upon error, {@link #sync} performs single
* token deletion, if possible. If it can't do that, it bails on the current
* rule and uses the default error recovery, which consumes until the
* resynchronization set of the current rule.
*
* <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
* with an empty alternative), then the expected set includes what follows
* the subrule.</p>
*
* <p>During loop iteration, it consumes until it sees a token that can start a
* sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
* stay in the loop as long as possible.</p>
*
* <p><strong>ORIGINS</strong></p>
*
* <p>Previous versions of ANTLR did a poor job of their recovery within loops.
* A single mismatch token or missing token would force the parser to bail
* out of the entire rules surrounding the loop. So, for rule</p>
*
* <pre>
* classDef : 'class' ID '{' member* '}'
* </pre>
*
* input with an extra token between members would force the parser to
* consume until it found the next class definition rather than the next
* member definition of the current class.
*
* <p>This functionality cost a little bit of effort because the parser has to
* compare token set at the start of the loop and at each iteration. If for
* some reason speed is suffering for you, you can turn off this
* functionality by simply overriding this method as a blank { }.</p>
*/
@Override
public void sync(Parser recognizer) throws RecognitionException {
ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState());
// If already recovering, don't try to sync
if (inErrorRecoveryMode(recognizer)) {
return;
}
TokenStream tokens = recognizer.getInputStream();
int la = tokens.LA(1);
// try cheaper subset first; might get lucky. seems to shave a wee bit off
IntervalSet nextTokens = recognizer.getATN().nextTokens(s);
if (nextTokens.contains(la)) {
// We are sure the token matches
nextTokensContext = null;
nextTokensState = ATNState.INVALID_STATE_NUMBER;
return;
}
if (nextTokens.contains(Token.EPSILON)) {
if (nextTokensContext == null) {
// It's possible the next token won't match; information tracked
// by sync is restricted for performance.
nextTokensContext = recognizer.getContext();
nextTokensState = recognizer.getState();
}
return;
}
switch(s.getStateType()) {
case ATNState.BLOCK_START:
case ATNState.STAR_BLOCK_START:
case ATNState.PLUS_BLOCK_START:
case ATNState.STAR_LOOP_ENTRY:
// report error and recover if possible
if (singleTokenDeletion(recognizer) != null) {
return;
}
throw new InputMismatchException(recognizer);
case ATNState.PLUS_LOOP_BACK:
case ATNState.STAR_LOOP_BACK:
// System.err.println("at loop back: "+s.getClass().getSimpleName());
reportUnwantedToken(recognizer);
IntervalSet expecting = recognizer.getExpectedTokens();
IntervalSet whatFollowsLoopIterationOrRule = expecting.or(getErrorRecoverySet(recognizer));
consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
break;
default:
// do nothing if we can't identify the exact kind of ATN state
break;
}
}
Aggregations