use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.
the class ParserATNSimulator method computeTargetState.
/**
* Compute a target state for an edge in the DFA, and attempt to add the
* computed state and corresponding edge to the DFA.
*
* @param dfa
* @param s The current DFA state
* @param remainingGlobalContext
* @param t The next input symbol
* @param useContext
* @param contextCache
*
* @return The computed target DFA state for the given input symbol
* {@code t}. If {@code t} does not lead to a valid DFA state, this method
* returns {@link #ERROR}.
*/
@NotNull
protected Tuple2<DFAState, ParserRuleContext> computeTargetState(@NotNull DFA dfa, @NotNull DFAState s, ParserRuleContext remainingGlobalContext, int t, boolean useContext, PredictionContextCache contextCache) {
List<ATNConfig> closureConfigs = new ArrayList<ATNConfig>(s.configs);
IntegerList contextElements = null;
ATNConfigSet reach = new ATNConfigSet();
boolean stepIntoGlobal;
do {
boolean hasMoreContext = !useContext || remainingGlobalContext != null;
if (!hasMoreContext) {
reach.setOutermostConfigSet(true);
}
ATNConfigSet reachIntermediate = new ATNConfigSet();
/* Configurations already in a rule stop state indicate reaching the end
* of the decision rule (local context) or end of the start rule (full
* context). Once reached, these configurations are never updated by a
* closure operation, so they are handled separately for the performance
* advantage of having a smaller intermediate set when calling closure.
*
* For full-context reach operations, separate handling is required to
* ensure that the alternative matching the longest overall sequence is
* chosen when multiple such configurations can match the input.
*/
List<ATNConfig> skippedStopStates = null;
for (ATNConfig c : closureConfigs) {
if (debug)
System.out.println("testing " + getTokenName(t) + " at " + c.toString());
if (c.getState() instanceof RuleStopState) {
assert c.getContext().isEmpty();
if (useContext && !c.getReachesIntoOuterContext() || t == IntStream.EOF) {
if (skippedStopStates == null) {
skippedStopStates = new ArrayList<ATNConfig>();
}
skippedStopStates.add(c);
}
continue;
}
int n = c.getState().getNumberOfOptimizedTransitions();
for (int ti = 0; ti < n; ti++) {
// for each optimized transition
Transition trans = c.getState().getOptimizedTransition(ti);
ATNState target = getReachableTarget(c, trans, t);
if (target != null) {
reachIntermediate.add(c.transform(target, false), contextCache);
}
}
}
/* This block optimizes the reach operation for intermediate sets which
* trivially indicate a termination state for the overall
* adaptivePredict operation.
*
* The conditions assume that intermediate
* contains all configurations relevant to the reach set, but this
* condition is not true when one or more configurations have been
* withheld in skippedStopStates, or when the current symbol is EOF.
*/
if (optimize_unique_closure && skippedStopStates == null && t != Token.EOF && reachIntermediate.getUniqueAlt() != ATN.INVALID_ALT_NUMBER) {
reachIntermediate.setOutermostConfigSet(reach.isOutermostConfigSet());
reach = reachIntermediate;
break;
}
/* If the reach set could not be trivially determined, perform a closure
* operation on the intermediate set to compute its initial value.
*/
final boolean collectPredicates = false;
boolean treatEofAsEpsilon = t == Token.EOF;
closure(reachIntermediate, reach, collectPredicates, hasMoreContext, contextCache, treatEofAsEpsilon);
stepIntoGlobal = reach.getDipsIntoOuterContext();
if (t == IntStream.EOF) {
/* After consuming EOF no additional input is possible, so we are
* only interested in configurations which reached the end of the
* decision rule (local context) or end of the start rule (full
* context). Update reach to contain only these configurations. This
* handles both explicit EOF transitions in the grammar and implicit
* EOF transitions following the end of the decision or start rule.
*
* This is handled before the configurations in skippedStopStates,
* because any configurations potentially added from that list are
* already guaranteed to meet this condition whether or not it's
* required.
*/
reach = removeAllConfigsNotInRuleStopState(reach, contextCache);
}
/* If skippedStopStates is not null, then it contains at least one
* configuration. For full-context reach operations, these
* configurations reached the end of the start rule, in which case we
* only add them back to reach if no configuration during the current
* closure operation reached such a state. This ensures adaptivePredict
* chooses an alternative matching the longest overall sequence when
* multiple alternatives are viable.
*/
if (skippedStopStates != null && (!useContext || !PredictionMode.hasConfigInRuleStopState(reach))) {
assert !skippedStopStates.isEmpty();
for (ATNConfig c : skippedStopStates) {
reach.add(c, contextCache);
}
}
if (useContext && stepIntoGlobal) {
reach.clear();
remainingGlobalContext = skipTailCalls(remainingGlobalContext);
int nextContextElement = getReturnState(remainingGlobalContext);
if (contextElements == null) {
contextElements = new IntegerList();
}
if (remainingGlobalContext.isEmpty()) {
remainingGlobalContext = null;
} else {
remainingGlobalContext = remainingGlobalContext.getParent();
}
contextElements.add(nextContextElement);
if (nextContextElement != PredictionContext.EMPTY_FULL_STATE_KEY) {
for (int i = 0; i < closureConfigs.size(); i++) {
closureConfigs.set(i, closureConfigs.get(i).appendContext(nextContextElement, contextCache));
}
}
}
} while (useContext && stepIntoGlobal);
if (reach.isEmpty()) {
addDFAEdge(s, t, ERROR);
return Tuple.create(ERROR, remainingGlobalContext);
}
DFAState result = addDFAEdge(dfa, s, t, contextElements, reach, contextCache);
return Tuple.create(result, remainingGlobalContext);
}
use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.
the class BaseTest method getTokenTypesViaATN.
public IntegerList getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) {
CharStream in = CharStreams.fromString(input);
IntegerList tokenTypes = new IntegerList();
int ttype;
do {
ttype = lexerATN.match(in, Lexer.DEFAULT_MODE);
tokenTypes.add(ttype);
} while (ttype != Token.EOF);
return tokenTypes;
}
use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.
the class ATNSerializer method serialize.
/**
* Serialize state descriptors, edge descriptors, and decision→state map
* into list of ints:
*
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
* num modes,
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
* num unicode-bmp-sets
* bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
* num unicode-smp-sets
* smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
* num total edges,
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
* num decisions,
* decision-0-start-state, decision-1-start-state, ...
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public IntegerList serialize() {
IntegerList data = new IntegerList();
data.add(ATNDeserializer.SERIALIZED_VERSION);
serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
// convert grammar type to ATN const to avoid dependence on ANTLRParser
data.add(atn.grammarType.ordinal());
data.add(atn.maxTokenType);
int nedges = 0;
// Note that we use a LinkedHashMap as a set to
// maintain insertion order while deduplicating
// entries with the same key.
Map<IntervalSet, Boolean> sets = new LinkedHashMap<IntervalSet, Boolean>();
// dump states, count edges and collect sets while doing so
IntegerList nonGreedyStates = new IntegerList();
IntegerList sllStates = new IntegerList();
IntegerList precedenceStates = new IntegerList();
data.add(atn.states.size());
for (ATNState s : atn.states) {
if (s == null) {
// might be optimized away
data.add(ATNState.INVALID_TYPE);
continue;
}
int stateType = s.getStateType();
if (s instanceof DecisionState) {
DecisionState decisionState = (DecisionState) s;
if (decisionState.nonGreedy) {
nonGreedyStates.add(s.stateNumber);
}
if (decisionState.sll) {
sllStates.add(s.stateNumber);
}
}
if (s instanceof RuleStartState && ((RuleStartState) s).isPrecedenceRule) {
precedenceStates.add(s.stateNumber);
}
data.add(stateType);
if (s.ruleIndex == -1) {
data.add(Character.MAX_VALUE);
} else {
data.add(s.ruleIndex);
}
if (s.getStateType() == ATNState.LOOP_END) {
data.add(((LoopEndState) s).loopBackState.stateNumber);
} else if (s instanceof BlockStartState) {
data.add(((BlockStartState) s).endState.stateNumber);
}
if (s.getStateType() != ATNState.RULE_STOP) {
// the deserializer can trivially derive these edges, so there's no need to serialize them
nedges += s.getNumberOfTransitions();
}
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
int edgeType = Transition.serializationTypes.get(t.getClass());
if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
SetTransition st = (SetTransition) t;
sets.put(st.set, true);
}
}
}
// non-greedy states
data.add(nonGreedyStates.size());
for (int i = 0; i < nonGreedyStates.size(); i++) {
data.add(nonGreedyStates.get(i));
}
// SLL decisions
data.add(sllStates.size());
for (int i = 0; i < sllStates.size(); i++) {
data.add(sllStates.get(i));
}
// precedence states
data.add(precedenceStates.size());
for (int i = 0; i < precedenceStates.size(); i++) {
data.add(precedenceStates.get(i));
}
int nrules = atn.ruleToStartState.length;
data.add(nrules);
for (int r = 0; r < nrules; r++) {
ATNState ruleStartState = atn.ruleToStartState[r];
data.add(ruleStartState.stateNumber);
boolean leftFactored = ruleNames.get(ruleStartState.ruleIndex).indexOf(ATNSimulator.RULE_VARIANT_DELIMITER) >= 0;
data.add(leftFactored ? 1 : 0);
if (atn.grammarType == ATNType.LEXER) {
if (atn.ruleToTokenType[r] == Token.EOF) {
data.add(Character.MAX_VALUE);
} else {
data.add(atn.ruleToTokenType[r]);
}
}
}
int nmodes = atn.modeToStartState.size();
data.add(nmodes);
if (nmodes > 0) {
for (ATNState modeStartState : atn.modeToStartState) {
data.add(modeStartState.stateNumber);
}
}
List<IntervalSet> bmpSets = new ArrayList<IntervalSet>();
List<IntervalSet> smpSets = new ArrayList<IntervalSet>();
for (IntervalSet set : sets.keySet()) {
if (set.getMaxElement() <= Character.MAX_VALUE) {
bmpSets.add(set);
} else {
smpSets.add(set);
}
}
serializeSets(data, bmpSets, new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
data.add(cp);
}
});
serializeSets(data, smpSets, new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
serializeInt(data, cp);
}
});
Map<IntervalSet, Integer> setIndices = new HashMap<IntervalSet, Integer>();
int setIndex = 0;
for (IntervalSet bmpSet : bmpSets) {
setIndices.put(bmpSet, setIndex++);
}
for (IntervalSet smpSet : smpSets) {
setIndices.put(smpSet, setIndex++);
}
data.add(nedges);
for (ATNState s : atn.states) {
if (s == null) {
// might be optimized away
continue;
}
if (s.getStateType() == ATNState.RULE_STOP) {
continue;
}
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
if (atn.states.get(t.target.stateNumber) == null) {
throw new IllegalStateException("Cannot serialize a transition to a removed state.");
}
int src = s.stateNumber;
int trg = t.target.stateNumber;
int edgeType = Transition.serializationTypes.get(t.getClass());
int arg1 = 0;
int arg2 = 0;
int arg3 = 0;
switch(edgeType) {
case Transition.RULE:
trg = ((RuleTransition) t).followState.stateNumber;
arg1 = ((RuleTransition) t).target.stateNumber;
arg2 = ((RuleTransition) t).ruleIndex;
arg3 = ((RuleTransition) t).precedence;
break;
case Transition.PRECEDENCE:
PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
arg1 = ppt.precedence;
break;
case Transition.PREDICATE:
PredicateTransition pt = (PredicateTransition) t;
arg1 = pt.ruleIndex;
arg2 = pt.predIndex;
arg3 = pt.isCtxDependent ? 1 : 0;
break;
case Transition.RANGE:
arg1 = ((RangeTransition) t).from;
arg2 = ((RangeTransition) t).to;
if (arg1 == Token.EOF) {
arg1 = 0;
arg3 = 1;
}
break;
case Transition.ATOM:
arg1 = ((AtomTransition) t).label;
if (arg1 == Token.EOF) {
arg1 = 0;
arg3 = 1;
}
break;
case Transition.ACTION:
ActionTransition at = (ActionTransition) t;
arg1 = at.ruleIndex;
arg2 = at.actionIndex;
if (arg2 == -1) {
arg2 = 0xFFFF;
}
arg3 = at.isCtxDependent ? 1 : 0;
break;
case Transition.SET:
arg1 = setIndices.get(((SetTransition) t).set);
break;
case Transition.NOT_SET:
arg1 = setIndices.get(((SetTransition) t).set);
break;
case Transition.WILDCARD:
break;
}
data.add(src);
data.add(trg);
data.add(edgeType);
data.add(arg1);
data.add(arg2);
data.add(arg3);
}
}
int ndecisions = atn.decisionToState.size();
data.add(ndecisions);
for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber);
}
//
if (atn.grammarType == ATNType.LEXER) {
data.add(atn.lexerActions.length);
for (LexerAction action : atn.lexerActions) {
data.add(action.getActionType().ordinal());
switch(action.getActionType()) {
case CHANNEL:
int channel = ((LexerChannelAction) action).getChannel();
data.add(channel != -1 ? channel : 0xFFFF);
data.add(0);
break;
case CUSTOM:
int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
int actionIndex = ((LexerCustomAction) action).getActionIndex();
data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
break;
case MODE:
int mode = ((LexerModeAction) action).getMode();
data.add(mode != -1 ? mode : 0xFFFF);
data.add(0);
break;
case MORE:
data.add(0);
data.add(0);
break;
case POP_MODE:
data.add(0);
data.add(0);
break;
case PUSH_MODE:
mode = ((LexerPushModeAction) action).getMode();
data.add(mode != -1 ? mode : 0xFFFF);
data.add(0);
break;
case SKIP:
data.add(0);
data.add(0);
break;
case TYPE:
int type = ((LexerTypeAction) action).getType();
data.add(type != -1 ? type : 0xFFFF);
data.add(0);
break;
default:
String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
throw new IllegalArgumentException(message);
}
}
}
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
}
int value = (data.get(i) + 2) & 0xFFFF;
data.set(i, value);
}
return data;
}
use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.
the class ATNSerializer method serializeSets.
private static void serializeSets(IntegerList data, Collection<IntervalSet> sets, CodePointSerializer codePointSerializer) {
int nSets = sets.size();
data.add(nSets);
for (IntervalSet set : sets) {
boolean containsEof = set.contains(Token.EOF);
if (containsEof && set.getIntervals().get(0).b == Token.EOF) {
data.add(set.getIntervals().size() - 1);
} else {
data.add(set.getIntervals().size());
}
data.add(containsEof ? 1 : 0);
for (Interval I : set.getIntervals()) {
if (I.a == Token.EOF) {
if (I.b == Token.EOF) {
continue;
} else {
codePointSerializer.serializeCodePoint(data, 0);
}
} else {
codePointSerializer.serializeCodePoint(data, I.a);
}
codePointSerializer.serializeCodePoint(data, I.b);
}
}
}
use of org.antlr.v4.runtime.misc.IntegerList in project antlr4 by tunnelvisionlabs.
the class TestATNParserPrediction method checkPredictedAlt.
/**
* first check that the ATN predicts right alt.
* Then check adaptive prediction.
*/
public void checkPredictedAlt(LexerGrammar lg, Grammar g, int decision, String inputString, int expectedAlt) {
Tool.internalOption_ShowATNConfigsInDFA = true;
ATN lexatn = createATN(lg, true);
LexerATNSimulator lexInterp = new LexerATNSimulator(lexatn);
IntegerList types = getTokenTypesViaATN(inputString, lexInterp);
System.out.println(types);
semanticProcess(lg);
g.importVocab(lg);
semanticProcess(g);
ParserATNFactory f = new ParserATNFactory(g);
ATN atn = f.createATN();
DOTGenerator dot = new DOTGenerator(g);
Rule r = g.getRule("a");
if (r != null)
System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("b");
if (r != null)
System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("e");
if (r != null)
System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("ifstat");
if (r != null)
System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("block");
if (r != null)
System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
// Check ATN prediction
// ParserATNSimulator<Token> interp = new ParserATNSimulator<Token>(atn);
TokenStream input = new IntTokenStream(types);
ParserInterpreterForTesting interp = new ParserInterpreterForTesting(g, input);
DecisionState startState = atn.decisionToState.get(decision);
DFA dfa = new DFA(startState, decision);
int alt = interp.adaptivePredict(input, decision, ParserRuleContext.emptyContext());
System.out.println(dot.getDOT(dfa, false));
assertEquals(expectedAlt, alt);
// Check adaptive prediction
input.seek(0);
alt = interp.adaptivePredict(input, decision, null);
assertEquals(expectedAlt, alt);
// run 2x; first time creates DFA in atn
input.seek(0);
alt = interp.adaptivePredict(input, decision, null);
assertEquals(expectedAlt, alt);
}
Aggregations