use of org.antlr.v4.tool.Rule in project antlr4 by tunnelvisionlabs.
the class DefaultErrorStrategy method getErrorRecoverySet.
/* Compute the error recovery set for the current rule. During
* rule invocation, the parser pushes the set of tokens that can
* follow that rule reference on the stack; this amounts to
* computing FIRST of what follows the rule reference in the
* enclosing rule. See LinearApproximator.FIRST().
* This local follow set only includes tokens
* from within the rule; i.e., the FIRST computation done by
* ANTLR stops at the end of a rule.
*
* EXAMPLE
*
* When you find a "no viable alt exception", the input is not
* consistent with any of the alternatives for rule r. The best
* thing to do is to consume tokens until you see something that
* can legally follow a call to r *or* any rule that called r.
* You don't want the exact set of viable next tokens because the
* input might just be missing a token--you might consume the
* rest of the input looking for one of the missing tokens.
*
* Consider grammar:
*
* a : '[' b ']'
* | '(' b ')'
* ;
* b : c '^' INT ;
* c : ID
* | INT
* ;
*
* At each rule invocation, the set of tokens that could follow
* that rule is pushed on a stack. Here are the various
* context-sensitive follow sets:
*
* FOLLOW(b1_in_a) = FIRST(']') = ']'
* FOLLOW(b2_in_a) = FIRST(')') = ')'
* FOLLOW(c_in_b) = FIRST('^') = '^'
*
* Upon erroneous input "[]", the call chain is
*
* a -> b -> c
*
* and, hence, the follow context stack is:
*
* depth follow set start of rule execution
* 0 <EOF> a (from main())
* 1 ']' b
* 2 '^' c
*
* Notice that ')' is not included, because b would have to have
* been called from a different context in rule a for ')' to be
* included.
*
* For error recovery, we cannot consider FOLLOW(c)
* (context-sensitive or otherwise). We need the combined set of
* all context-sensitive FOLLOW sets--the set of all tokens that
* could follow any reference in the call chain. We need to
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
* we resync'd to that token, we'd consume until EOF. We need to
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
* In this case, for input "[]", LA(1) is ']' and in the set, so we would
* not consume anything. After printing an error, rule c would
* return normally. Rule b would not find the required '^' though.
* At this point, it gets a mismatched token error and throws an
* exception (since LA(1) is not in the viable following token
* set). The rule exception handler tries to recover, but finds
* the same recovery set and doesn't consume anything. Rule b
* exits normally returning to rule a. Now it finds the ']' (and
* with the successful match exits errorRecovery mode).
*
* So, you can see that the parser walks up the call chain looking
* for the token that was a member of the recovery set.
*
* Errors are not generated in errorRecovery mode.
*
* ANTLR's error recovery mechanism is based upon original ideas:
*
* "Algorithms + Data Structures = Programs" by Niklaus Wirth
*
* and
*
* "A note on error recovery in recursive descent parsers":
* http://portal.acm.org/citation.cfm?id=947902.947905
*
* Later, Josef Grosch had some good ideas:
*
* "Efficient and Comfortable Error Recovery in Recursive Descent
* Parsers":
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
*
* Like Grosch I implement context-sensitive FOLLOW sets that are combined
* at run-time upon error to avoid overhead during parsing.
*/
@NotNull
protected IntervalSet getErrorRecoverySet(@NotNull Parser recognizer) {
ATN atn = recognizer.getInterpreter().atn;
RuleContext ctx = recognizer._ctx;
IntervalSet recoverSet = new IntervalSet();
while (ctx != null && ctx.invokingState >= 0) {
// compute what follows who invoked us
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition) invokingState.transition(0);
IntervalSet follow = atn.nextTokens(rt.followState);
recoverSet.addAll(follow);
ctx = ctx.parent;
}
recoverSet.remove(Token.EPSILON);
// System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
return recoverSet;
}
use of org.antlr.v4.tool.Rule in project antlr4 by tunnelvisionlabs.
the class TestTokenPositionOptions method testLeftRecursionRewrite.
@Test
public void testLeftRecursionRewrite() throws Exception {
Grammar g = new Grammar("grammar T;\n" + "s : e ';' ;\n" + "e : e '*' e\n" + " | e '+' e\n" + " | e '.' ID\n" + " | '-' e\n" + " | ID\n" + " ;\n" + "ID : [a-z]+ ;\n");
String expectedTree = "(COMBINED_GRAMMAR T (RULES (RULE s (BLOCK (ALT e ';'))) (RULE e (BLOCK (ALT (BLOCK (ALT {} ('-' (ELEMENT_OPTIONS (= tokenIndex 43))) (e (ELEMENT_OPTIONS (= tokenIndex 45) (= p 2)))) (ALT (ID (ELEMENT_OPTIONS (= tokenIndex 49))))) (* (BLOCK (ALT ({precpred(_ctx, 5)}? (ELEMENT_OPTIONS (= p 5))) ('*' (ELEMENT_OPTIONS (= tokenIndex 21))) (e (ELEMENT_OPTIONS (= tokenIndex 23) (= p 6)))) (ALT ({precpred(_ctx, 4)}? (ELEMENT_OPTIONS (= p 4))) ('+' (ELEMENT_OPTIONS (= tokenIndex 29))) (e (ELEMENT_OPTIONS (= tokenIndex 31) (= p 5)))) (ALT ({precpred(_ctx, 3)}? (ELEMENT_OPTIONS (= p 3))) ('.' (ELEMENT_OPTIONS (= tokenIndex 37))) (ID (ELEMENT_OPTIONS (= tokenIndex 39)))))))))))";
assertEquals(expectedTree, g.ast.toStringTree());
String expectedElementTokens = "[@5,11:11='s',<57>,2:0]\n" + "[@9,15:15='e',<57>,2:4]\n" + "[@11,17:19='';'',<62>,2:6]\n" + "[@15,23:23='e',<57>,3:0]\n" + "[@43,64:66=''-'',<62>,6:4]\n" + "[@45,68:68='e',<57>,6:8]\n" + "[@49,74:75='ID',<66>,7:4]\n" + "[@21,29:31=''*'',<62>,3:6]\n" + "[@23,33:33='e',<57>,3:10]\n" + "[@29,41:43=''+'',<62>,4:6]\n" + "[@31,45:45='e',<57>,4:10]\n" + "[@37,53:55=''.'',<62>,5:6]\n" + "[@39,57:58='ID',<66>,5:10]";
IntervalSet types = new IntervalSet(ANTLRParser.TOKEN_REF, ANTLRParser.STRING_LITERAL, ANTLRParser.RULE_REF);
List<GrammarAST> nodes = g.ast.getNodesWithTypePreorderDFS(types);
List<Token> tokens = new ArrayList<Token>();
for (GrammarAST node : nodes) {
tokens.add(node.getToken());
}
assertEquals(expectedElementTokens, Utils.join(tokens.toArray(), "\n"));
}
use of org.antlr.v4.tool.Rule in project antlr4 by tunnelvisionlabs.
the class TestTokenPositionOptions method testLeftRecursionWithLabels.
@Test
public void testLeftRecursionWithLabels() throws Exception {
Grammar g = new Grammar("grammar T;\n" + "s : e ';' ;\n" + "e : e '*' x=e\n" + " | e '+' e\n" + " | e '.' y=ID\n" + " | '-' e\n" + " | ID\n" + " ;\n" + "ID : [a-z]+ ;\n");
String expectedTree = "(COMBINED_GRAMMAR T (RULES (RULE s (BLOCK (ALT e ';'))) (RULE e (BLOCK (ALT (BLOCK (ALT {} ('-' (ELEMENT_OPTIONS (= tokenIndex 47))) (e (ELEMENT_OPTIONS (= tokenIndex 49) (= p 2)))) (ALT (ID (ELEMENT_OPTIONS (= tokenIndex 53))))) (* (BLOCK (ALT ({precpred(_ctx, 5)}? (ELEMENT_OPTIONS (= p 5))) ('*' (ELEMENT_OPTIONS (= tokenIndex 21))) (= x (e (ELEMENT_OPTIONS (= tokenIndex 25) (= p 6))))) (ALT ({precpred(_ctx, 4)}? (ELEMENT_OPTIONS (= p 4))) ('+' (ELEMENT_OPTIONS (= tokenIndex 31))) (e (ELEMENT_OPTIONS (= tokenIndex 33) (= p 5)))) (ALT ({precpred(_ctx, 3)}? (ELEMENT_OPTIONS (= p 3))) ('.' (ELEMENT_OPTIONS (= tokenIndex 39))) (= y (ID (ELEMENT_OPTIONS (= tokenIndex 43))))))))))))";
assertEquals(expectedTree, g.ast.toStringTree());
String expectedElementTokens = "[@5,11:11='s',<57>,2:0]\n" + "[@9,15:15='e',<57>,2:4]\n" + "[@11,17:19='';'',<62>,2:6]\n" + "[@15,23:23='e',<57>,3:0]\n" + "[@47,68:70=''-'',<62>,6:4]\n" + "[@49,72:72='e',<57>,6:8]\n" + "[@53,78:79='ID',<66>,7:4]\n" + "[@21,29:31=''*'',<62>,3:6]\n" + "[@25,35:35='e',<57>,3:12]\n" + "[@31,43:45=''+'',<62>,4:6]\n" + "[@33,47:47='e',<57>,4:10]\n" + "[@39,55:57=''.'',<62>,5:6]\n" + "[@43,61:62='ID',<66>,5:12]";
IntervalSet types = new IntervalSet(ANTLRParser.TOKEN_REF, ANTLRParser.STRING_LITERAL, ANTLRParser.RULE_REF);
List<GrammarAST> nodes = g.ast.getNodesWithTypePreorderDFS(types);
List<Token> tokens = new ArrayList<Token>();
for (GrammarAST node : nodes) {
tokens.add(node.getToken());
}
assertEquals(expectedElementTokens, Utils.join(tokens.toArray(), "\n"));
}
use of org.antlr.v4.tool.Rule in project antlr4 by antlr.
the class ATNSerializer method serialize.
/**
* Serialize state descriptors, edge descriptors, and decision→state map
* into list of ints:
*
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
* num modes,
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
* num unicode-bmp-sets
* bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
* num unicode-smp-sets
* smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
* num total edges,
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
* num decisions,
* decision-0-start-state, decision-1-start-state, ...
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public IntegerList serialize() {
IntegerList data = new IntegerList();
data.add(ATNDeserializer.SERIALIZED_VERSION);
serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
// convert grammar type to ATN const to avoid dependence on ANTLRParser
data.add(atn.grammarType.ordinal());
data.add(atn.maxTokenType);
int nedges = 0;
// Note that we use a LinkedHashMap as a set to
// maintain insertion order while deduplicating
// entries with the same key.
Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
// dump states, count edges and collect sets while doing so
IntegerList nonGreedyStates = new IntegerList();
IntegerList precedenceStates = new IntegerList();
data.add(atn.states.size());
for (ATNState s : atn.states) {
if (s == null) {
// might be optimized away
data.add(ATNState.INVALID_TYPE);
continue;
}
int stateType = s.getStateType();
if (s instanceof DecisionState && ((DecisionState) s).nonGreedy) {
nonGreedyStates.add(s.stateNumber);
}
if (s instanceof RuleStartState && ((RuleStartState) s).isLeftRecursiveRule) {
precedenceStates.add(s.stateNumber);
}
data.add(stateType);
if (s.ruleIndex == -1) {
data.add(Character.MAX_VALUE);
} else {
data.add(s.ruleIndex);
}
if (s.getStateType() == ATNState.LOOP_END) {
data.add(((LoopEndState) s).loopBackState.stateNumber);
} else if (s instanceof BlockStartState) {
data.add(((BlockStartState) s).endState.stateNumber);
}
if (s.getStateType() != ATNState.RULE_STOP) {
// the deserializer can trivially derive these edges, so there's no need to serialize them
nedges += s.getNumberOfTransitions();
}
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
int edgeType = Transition.serializationTypes.get(t.getClass());
if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
SetTransition st = (SetTransition) t;
sets.put(st.set, true);
}
}
}
// non-greedy states
data.add(nonGreedyStates.size());
for (int i = 0; i < nonGreedyStates.size(); i++) {
data.add(nonGreedyStates.get(i));
}
// precedence states
data.add(precedenceStates.size());
for (int i = 0; i < precedenceStates.size(); i++) {
data.add(precedenceStates.get(i));
}
int nrules = atn.ruleToStartState.length;
data.add(nrules);
for (int r = 0; r < nrules; r++) {
ATNState ruleStartState = atn.ruleToStartState[r];
data.add(ruleStartState.stateNumber);
if (atn.grammarType == ATNType.LEXER) {
if (atn.ruleToTokenType[r] == Token.EOF) {
data.add(Character.MAX_VALUE);
} else {
data.add(atn.ruleToTokenType[r]);
}
}
}
int nmodes = atn.modeToStartState.size();
data.add(nmodes);
if (nmodes > 0) {
for (ATNState modeStartState : atn.modeToStartState) {
data.add(modeStartState.stateNumber);
}
}
List<IntervalSet> bmpSets = new ArrayList<>();
List<IntervalSet> smpSets = new ArrayList<>();
for (IntervalSet set : sets.keySet()) {
if (!set.isNil() && set.getMaxElement() <= Character.MAX_VALUE) {
bmpSets.add(set);
} else {
smpSets.add(set);
}
}
serializeSets(data, bmpSets, new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
data.add(cp);
}
});
serializeSets(data, smpSets, new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
serializeInt(data, cp);
}
});
Map<IntervalSet, Integer> setIndices = new HashMap<>();
int setIndex = 0;
for (IntervalSet bmpSet : bmpSets) {
setIndices.put(bmpSet, setIndex++);
}
for (IntervalSet smpSet : smpSets) {
setIndices.put(smpSet, setIndex++);
}
data.add(nedges);
for (ATNState s : atn.states) {
if (s == null) {
// might be optimized away
continue;
}
if (s.getStateType() == ATNState.RULE_STOP) {
continue;
}
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
if (atn.states.get(t.target.stateNumber) == null) {
throw new IllegalStateException("Cannot serialize a transition to a removed state.");
}
int src = s.stateNumber;
int trg = t.target.stateNumber;
int edgeType = Transition.serializationTypes.get(t.getClass());
int arg1 = 0;
int arg2 = 0;
int arg3 = 0;
switch(edgeType) {
case Transition.RULE:
trg = ((RuleTransition) t).followState.stateNumber;
arg1 = ((RuleTransition) t).target.stateNumber;
arg2 = ((RuleTransition) t).ruleIndex;
arg3 = ((RuleTransition) t).precedence;
break;
case Transition.PRECEDENCE:
PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
arg1 = ppt.precedence;
break;
case Transition.PREDICATE:
PredicateTransition pt = (PredicateTransition) t;
arg1 = pt.ruleIndex;
arg2 = pt.predIndex;
arg3 = pt.isCtxDependent ? 1 : 0;
break;
case Transition.RANGE:
arg1 = ((RangeTransition) t).from;
arg2 = ((RangeTransition) t).to;
if (arg1 == Token.EOF) {
arg1 = 0;
arg3 = 1;
}
break;
case Transition.ATOM:
arg1 = ((AtomTransition) t).label;
if (arg1 == Token.EOF) {
arg1 = 0;
arg3 = 1;
}
break;
case Transition.ACTION:
ActionTransition at = (ActionTransition) t;
arg1 = at.ruleIndex;
arg2 = at.actionIndex;
if (arg2 == -1) {
arg2 = 0xFFFF;
}
arg3 = at.isCtxDependent ? 1 : 0;
break;
case Transition.SET:
arg1 = setIndices.get(((SetTransition) t).set);
break;
case Transition.NOT_SET:
arg1 = setIndices.get(((SetTransition) t).set);
break;
case Transition.WILDCARD:
break;
}
data.add(src);
data.add(trg);
data.add(edgeType);
data.add(arg1);
data.add(arg2);
data.add(arg3);
}
}
int ndecisions = atn.decisionToState.size();
data.add(ndecisions);
for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber);
}
//
if (atn.grammarType == ATNType.LEXER) {
data.add(atn.lexerActions.length);
for (LexerAction action : atn.lexerActions) {
data.add(action.getActionType().ordinal());
switch(action.getActionType()) {
case CHANNEL:
int channel = ((LexerChannelAction) action).getChannel();
data.add(channel != -1 ? channel : 0xFFFF);
data.add(0);
break;
case CUSTOM:
int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
int actionIndex = ((LexerCustomAction) action).getActionIndex();
data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
break;
case MODE:
int mode = ((LexerModeAction) action).getMode();
data.add(mode != -1 ? mode : 0xFFFF);
data.add(0);
break;
case MORE:
data.add(0);
data.add(0);
break;
case POP_MODE:
data.add(0);
data.add(0);
break;
case PUSH_MODE:
mode = ((LexerPushModeAction) action).getMode();
data.add(mode != -1 ? mode : 0xFFFF);
data.add(0);
break;
case SKIP:
data.add(0);
data.add(0);
break;
case TYPE:
int type = ((LexerTypeAction) action).getType();
data.add(type != -1 ? type : 0xFFFF);
data.add(0);
break;
default:
String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
throw new IllegalArgumentException(message);
}
}
}
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
}
int value = (data.get(i) + 2) & 0xFFFF;
data.set(i, value);
}
return data;
}
use of org.antlr.v4.tool.Rule in project antlr4 by antlr.
the class ParserATNSimulator method execATN.
/**
* Performs ATN simulation to compute a predicted alternative based
* upon the remaining input, but also updates the DFA cache to avoid
* having to traverse the ATN again for the same input sequence.
*
* There are some key conditions we're looking for after computing a new
* set of ATN configs (proposed DFA state):
* if the set is empty, there is no viable alternative for current symbol
* does the state uniquely predict an alternative?
* does the state have a conflict that would prevent us from
* putting it on the work list?
*
* We also have some key operations to do:
* add an edge from previous DFA state to potentially new DFA state, D,
* upon current symbol but only if adding to work list, which means in all
* cases except no viable alternative (and possibly non-greedy decisions?)
* collecting predicates and adding semantic context to DFA accept states
* adding rule context to context-sensitive DFA accept states
* consuming an input symbol
* reporting a conflict
* reporting an ambiguity
* reporting a context sensitivity
* reporting insufficient predicates
*
* cover these cases:
* dead end
* single alt
* single alt + preds
* conflict
* conflict + preds
*/
protected int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, ParserRuleContext outerContext) {
if (debug || debug_list_atn_decisions) {
System.out.println("execATN decision " + dfa.decision + " exec LA(1)==" + getLookaheadName(input) + " line " + input.LT(1).getLine() + ":" + input.LT(1).getCharPositionInLine());
}
DFAState previousD = s0;
if (debug)
System.out.println("s0 = " + s0);
int t = input.LA(1);
while (true) {
// while more work
DFAState D = getExistingTargetState(previousD, t);
if (D == null) {
D = computeTargetState(dfa, previousD, t);
}
if (D == ERROR) {
// if any configs in previous dipped into outer context, that
// means that input up to t actually finished entry rule
// at least for SLL decision. Full LL doesn't dip into outer
// so don't need special case.
// We will get an error no matter what so delay until after
// decision; better error message. Also, no reachable target
// ATN states in SLL implies LL will also get nowhere.
// If conflict in states that dip out, choose min since we
// will get error no matter what.
NoViableAltException e = noViableAlt(input, outerContext, previousD.configs, startIndex);
input.seek(startIndex);
int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext);
if (alt != ATN.INVALID_ALT_NUMBER) {
return alt;
}
throw e;
}
if (D.requiresFullContext && mode != PredictionMode.SLL) {
// IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
BitSet conflictingAlts = D.configs.conflictingAlts;
if (D.predicates != null) {
if (debug)
System.out.println("DFA state has preds in DFA sim LL failover");
int conflictIndex = input.index();
if (conflictIndex != startIndex) {
input.seek(startIndex);
}
conflictingAlts = evalSemanticContext(D.predicates, outerContext, true);
if (conflictingAlts.cardinality() == 1) {
if (debug)
System.out.println("Full LL avoided");
return conflictingAlts.nextSetBit(0);
}
if (conflictIndex != startIndex) {
// restore the index so reporting the fallback to full
// context occurs with the index at the correct spot
input.seek(conflictIndex);
}
}
if (dfa_debug)
System.out.println("ctx sensitive state " + outerContext + " in " + D);
boolean fullCtx = true;
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, fullCtx);
reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index());
int alt = execATNWithFullContext(dfa, D, s0_closure, input, startIndex, outerContext);
return alt;
}
if (D.isAcceptState) {
if (D.predicates == null) {
return D.prediction;
}
int stopIndex = input.index();
input.seek(startIndex);
BitSet alts = evalSemanticContext(D.predicates, outerContext, true);
switch(alts.cardinality()) {
case 0:
throw noViableAlt(input, outerContext, D.configs, startIndex);
case 1:
return alts.nextSetBit(0);
default:
// report ambiguity after predicate evaluation to make sure the correct
// set of ambig alts is reported.
reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs);
return alts.nextSetBit(0);
}
}
previousD = D;
if (t != IntStream.EOF) {
input.consume();
t = input.LA(1);
}
}
}
Aggregations