use of org.antlr.v4.runtime.RecognitionException in project antlr4 by antlr.
the class ParseTreePatternMatcher method compile.
/**
* For repeated use of a tree pattern, compile it to a
* {@link ParseTreePattern} using this method.
*/
public ParseTreePattern compile(String pattern, int patternRuleIndex) {
List<? extends Token> tokenList = tokenize(pattern);
ListTokenSource tokenSrc = new ListTokenSource(tokenList);
CommonTokenStream tokens = new CommonTokenStream(tokenSrc);
ParserInterpreter parserInterp = new ParserInterpreter(parser.getGrammarFileName(), parser.getVocabulary(), Arrays.asList(parser.getRuleNames()), parser.getATNWithBypassAlts(), tokens);
ParseTree tree = null;
try {
parserInterp.setErrorHandler(new BailErrorStrategy());
tree = parserInterp.parse(patternRuleIndex);
// System.out.println("pattern tree = "+tree.toStringTree(parserInterp));
} catch (ParseCancellationException e) {
throw (RecognitionException) e.getCause();
} catch (RecognitionException re) {
throw re;
} catch (Exception e) {
throw new CannotInvokeStartRule(e);
}
// Make sure tree pattern compilation checks for a complete parse
if (tokens.LA(1) != Token.EOF) {
throw new StartRuleDoesNotConsumeFullPattern();
}
return new ParseTreePattern(this, pattern, patternRuleIndex, tree);
}
use of org.antlr.v4.runtime.RecognitionException in project antlr4 by antlr.
the class DefaultErrorStrategy method recover.
/**
* {@inheritDoc}
*
* <p>The default implementation resynchronizes the parser by consuming tokens
* until we find one in the resynchronization set--loosely the set of tokens
* that can follow the current rule.</p>
*/
@Override
public void recover(Parser recognizer, RecognitionException e) {
// ", states="+lastErrorStates);
if (lastErrorIndex == recognizer.getInputStream().index() && lastErrorStates != null && lastErrorStates.contains(recognizer.getState())) {
// uh oh, another error at same token index and previously-visited
// state in ATN; must be a case where LT(1) is in the recovery
// token set so nothing got consumed. Consume a single token
// at least to prevent an infinite loop; this is a failsafe.
// System.err.println("seen error condition before index="+
// lastErrorIndex+", states="+lastErrorStates);
// System.err.println("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
recognizer.consume();
}
lastErrorIndex = recognizer.getInputStream().index();
if (lastErrorStates == null)
lastErrorStates = new IntervalSet();
lastErrorStates.add(recognizer.getState());
IntervalSet followSet = getErrorRecoverySet(recognizer);
consumeUntil(recognizer, followSet);
}
use of org.antlr.v4.runtime.RecognitionException in project antlr4 by antlr.
the class DefaultErrorStrategy method sync.
/**
* The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
* that the current lookahead symbol is consistent with what were expecting
* at this point in the ATN. You can call this anytime but ANTLR only
* generates code to check before subrules/loops and each iteration.
*
* <p>Implements Jim Idle's magic sync mechanism in closures and optional
* subrules. E.g.,</p>
*
* <pre>
* a : sync ( stuff sync )* ;
* sync : {consume to what can follow sync} ;
* </pre>
*
* At the start of a sub rule upon error, {@link #sync} performs single
* token deletion, if possible. If it can't do that, it bails on the current
* rule and uses the default error recovery, which consumes until the
* resynchronization set of the current rule.
*
* <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
* with an empty alternative), then the expected set includes what follows
* the subrule.</p>
*
* <p>During loop iteration, it consumes until it sees a token that can start a
* sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
* stay in the loop as long as possible.</p>
*
* <p><strong>ORIGINS</strong></p>
*
* <p>Previous versions of ANTLR did a poor job of their recovery within loops.
* A single mismatch token or missing token would force the parser to bail
* out of the entire rules surrounding the loop. So, for rule</p>
*
* <pre>
* classDef : 'class' ID '{' member* '}'
* </pre>
*
* input with an extra token between members would force the parser to
* consume until it found the next class definition rather than the next
* member definition of the current class.
*
* <p>This functionality cost a little bit of effort because the parser has to
* compare token set at the start of the loop and at each iteration. If for
* some reason speed is suffering for you, you can turn off this
* functionality by simply overriding this method as a blank { }.</p>
*/
@Override
public void sync(Parser recognizer) throws RecognitionException {
ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState());
// If already recovering, don't try to sync
if (inErrorRecoveryMode(recognizer)) {
return;
}
TokenStream tokens = recognizer.getInputStream();
int la = tokens.LA(1);
// try cheaper subset first; might get lucky. seems to shave a wee bit off
IntervalSet nextTokens = recognizer.getATN().nextTokens(s);
if (nextTokens.contains(Token.EPSILON) || nextTokens.contains(la)) {
return;
}
switch(s.getStateType()) {
case ATNState.BLOCK_START:
case ATNState.STAR_BLOCK_START:
case ATNState.PLUS_BLOCK_START:
case ATNState.STAR_LOOP_ENTRY:
// report error and recover if possible
if (singleTokenDeletion(recognizer) != null) {
return;
}
throw new InputMismatchException(recognizer);
case ATNState.PLUS_LOOP_BACK:
case ATNState.STAR_LOOP_BACK:
// System.err.println("at loop back: "+s.getClass().getSimpleName());
reportUnwantedToken(recognizer);
IntervalSet expecting = recognizer.getExpectedTokens();
IntervalSet whatFollowsLoopIterationOrRule = expecting.or(getErrorRecoverySet(recognizer));
consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
break;
default:
// do nothing if we can't identify the exact kind of ATN state
break;
}
}
use of org.antlr.v4.runtime.RecognitionException in project antlr4 by antlr.
the class GrammarParserInterpreter method getAllPossibleParseTrees.
/** Given an ambiguous parse information, return the list of ambiguous parse trees.
* An ambiguity occurs when a specific token sequence can be recognized
* in more than one way by the grammar. These ambiguities are detected only
* at decision points.
*
* The list of trees includes the actual interpretation (that for
* the minimum alternative number) and all ambiguous alternatives.
* The actual interpretation is always first.
*
* This method reuses the same physical input token stream used to
* detect the ambiguity by the original parser in the first place.
* This method resets/seeks within but does not alter originalParser.
*
* The trees are rooted at the node whose start..stop token indices
* include the start and stop indices of this ambiguity event. That is,
* the trees returned will always include the complete ambiguous subphrase
* identified by the ambiguity event. The subtrees returned will
* also always contain the node associated with the overridden decision.
*
* Be aware that this method does NOT notify error or parse listeners as
* it would trigger duplicate or otherwise unwanted events.
*
* This uses a temporary ParserATNSimulator and a ParserInterpreter
* so we don't mess up any statistics, event lists, etc...
* The parse tree constructed while identifying/making ambiguityInfo is
* not affected by this method as it creates a new parser interp to
* get the ambiguous interpretations.
*
* Nodes in the returned ambig trees are independent of the original parse
* tree (constructed while identifying/creating ambiguityInfo).
*
* @since 4.5.1
*
* @param g From which grammar should we drive alternative
* numbers and alternative labels.
*
* @param originalParser The parser used to create ambiguityInfo; it
* is not modified by this routine and can be either
* a generated or interpreted parser. It's token
* stream *is* reset/seek()'d.
* @param tokens A stream of tokens to use with the temporary parser.
* This will often be just the token stream within the
* original parser but here it is for flexibility.
*
* @param decision Which decision to try different alternatives for.
*
* @param alts The set of alternatives to try while re-parsing.
*
* @param startIndex The index of the first token of the ambiguous
* input or other input of interest.
*
* @param stopIndex The index of the last token of the ambiguous input.
* The start and stop indexes are used primarily to
* identify how much of the resulting parse tree
* to return.
*
* @param startRuleIndex The start rule for the entire grammar, not
* the ambiguous decision. We re-parse the entire input
* and so we need the original start rule.
*
* @return The list of all possible interpretations of
* the input for the decision in ambiguityInfo.
* The actual interpretation chosen by the parser
* is always given first because this method
* retests the input in alternative order and
* ANTLR always resolves ambiguities by choosing
* the first alternative that matches the input.
* The subtree returned
*
* @throws RecognitionException Throws upon syntax error while matching
* ambig input.
*/
public static List<ParserRuleContext> getAllPossibleParseTrees(Grammar g, Parser originalParser, TokenStream tokens, int decision, BitSet alts, int startIndex, int stopIndex, int startRuleIndex) throws RecognitionException {
List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
// Create a new parser interpreter to parse the ambiguous subphrase
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
if (stopIndex >= (tokens.size() - 1)) {
// if we are pointing at EOF token
// EOF is not in tree, so must be 1 less than last non-EOF token
stopIndex = tokens.size() - 2;
}
// get ambig trees
int alt = alts.nextSetBit(0);
while (alt >= 0) {
// re-parse entire input for all ambiguous alternatives
// (don't have to do first as it's been parsed, but do again for simplicity
// using this temp parser.)
parser.reset();
parser.addDecisionOverride(decision, startIndex, alt);
ParserRuleContext t = parser.parse(startRuleIndex);
GrammarInterpreterRuleContext ambigSubTree = (GrammarInterpreterRuleContext) Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex);
// Use higher of overridden decision tree or tree enclosing all tokens
if (Trees.isAncestorOf(parser.getOverrideDecisionRoot(), ambigSubTree)) {
ambigSubTree = (GrammarInterpreterRuleContext) parser.getOverrideDecisionRoot();
}
trees.add(ambigSubTree);
alt = alts.nextSetBit(alt + 1);
}
return trees;
}
use of org.antlr.v4.runtime.RecognitionException in project antlr4 by antlr.
the class TestCodeGeneration method getEvalInfoForString.
public List<String> getEvalInfoForString(String grammarString, String pattern) throws RecognitionException {
ErrorQueue equeue = new ErrorQueue();
Grammar g = new Grammar(grammarString);
List<String> evals = new ArrayList<String>();
if (g.ast != null && !g.ast.hasErrors) {
SemanticPipeline sem = new SemanticPipeline(g);
sem.process();
ATNFactory factory = new ParserATNFactory(g);
if (g.isLexer())
factory = new LexerATNFactory((LexerGrammar) g);
g.atn = factory.createATN();
CodeGenerator gen = new CodeGenerator(g);
ST outputFileST = gen.generateParser();
// STViz viz = outputFileST.inspect();
// try {
// viz.waitForClose();
// }
// catch (Exception e) {
// e.printStackTrace();
// }
boolean debug = false;
DebugInterpreter interp = new DebugInterpreter(outputFileST.groupThatCreatedThisInstance, outputFileST.impl.nativeGroup.errMgr, debug);
InstanceScope scope = new InstanceScope(null, outputFileST);
StringWriter sw = new StringWriter();
AutoIndentWriter out = new AutoIndentWriter(sw);
interp.exec(out, scope);
for (String e : interp.evals) {
if (e.contains(pattern)) {
evals.add(e);
}
}
}
if (equeue.size() > 0) {
System.err.println(equeue.toString());
}
return evals;
}
Aggregations