use of org.antlr.v4.runtime.atn.ParserATNSimulator in project robozonky by RoboZonky.
the class SideEffectFreeParser method modifyInterpreter.
private static void modifyInterpreter(final NaturalLanguageStrategyParser p) {
final int originalSize = p.getInterpreter().decisionToDFA.length;
// give our own array so the static one isn't used
final DFA[] emptyDFA = new DFA[originalSize];
final ParserATNSimulator newInterpreter = new ParserATNSimulator(p, p.getATN(), emptyDFA, new PredictionContextCache());
// initialize our array so that the parser functions properly
newInterpreter.clearDFA();
// replace the interpreter to bypass all static caches
p.setInterpreter(newInterpreter);
}
use of org.antlr.v4.runtime.atn.ParserATNSimulator in project antlr4 by tunnelvisionlabs.
the class TestPerformance method parseSources.
@SuppressWarnings("unused")
protected void parseSources(final int currentPass, final ParserFactory factory, Collection<InputDescriptor> sources, boolean shuffleSources) throws InterruptedException {
if (shuffleSources) {
List<InputDescriptor> sourcesList = new ArrayList<InputDescriptor>(sources);
synchronized (RANDOM) {
Collections.shuffle(sourcesList, RANDOM);
}
sources = sourcesList;
}
long startTime = System.nanoTime();
tokenCount.set(currentPass, 0);
int inputSize = 0;
int inputCount = 0;
Collection<Future<FileParseResult>> results = new ArrayList<Future<FileParseResult>>();
ExecutorService executorService;
if (FILE_GRANULARITY) {
executorService = Executors.newFixedThreadPool(FILE_GRANULARITY ? NUMBER_OF_THREADS : 1, new NumberedThreadFactory());
} else {
executorService = Executors.newSingleThreadExecutor(new FixedThreadNumberFactory(((NumberedThread) Thread.currentThread()).getThreadNumber()));
}
for (InputDescriptor inputDescriptor : sources) {
if (inputCount >= MAX_FILES_PER_PARSE_ITERATION) {
break;
}
final CharStream input = inputDescriptor.getInputStream();
input.seek(0);
inputSize += input.size();
inputCount++;
Future<FileParseResult> futureChecksum = executorService.submit(new Callable<FileParseResult>() {
@Override
public FileParseResult call() {
// System.out.format("Parsing file %s\n", input.getSourceName());
try {
return factory.parseFile(input, currentPass, ((NumberedThread) Thread.currentThread()).getThreadNumber());
} catch (IllegalStateException ex) {
ex.printStackTrace(System.err);
} catch (Throwable t) {
t.printStackTrace(System.err);
}
return null;
}
});
results.add(futureChecksum);
}
MurmurHashChecksum checksum = new MurmurHashChecksum();
int currentIndex = -1;
for (Future<FileParseResult> future : results) {
currentIndex++;
int fileChecksum = 0;
try {
FileParseResult fileResult = future.get();
if (COMPUTE_TRANSITION_STATS) {
totalTransitionsPerFile[currentPass][currentIndex] = sum(fileResult.parserTotalTransitions);
computedTransitionsPerFile[currentPass][currentIndex] = sum(fileResult.parserComputedTransitions);
if (DETAILED_DFA_STATE_STATS) {
decisionInvocationsPerFile[currentPass][currentIndex] = fileResult.decisionInvocations;
fullContextFallbackPerFile[currentPass][currentIndex] = fileResult.fullContextFallback;
nonSllPerFile[currentPass][currentIndex] = fileResult.nonSll;
totalTransitionsPerDecisionPerFile[currentPass][currentIndex] = fileResult.parserTotalTransitions;
computedTransitionsPerDecisionPerFile[currentPass][currentIndex] = fileResult.parserComputedTransitions;
fullContextTransitionsPerDecisionPerFile[currentPass][currentIndex] = fileResult.parserFullContextTransitions;
}
}
if (COMPUTE_TIMING_STATS) {
timePerFile[currentPass][currentIndex] = fileResult.endTime - fileResult.startTime;
tokensPerFile[currentPass][currentIndex] = fileResult.tokenCount;
}
fileChecksum = fileResult.checksum;
} catch (ExecutionException ex) {
Logger.getLogger(TestPerformance.class.getName()).log(Level.SEVERE, null, ex);
}
if (COMPUTE_CHECKSUM) {
updateChecksum(checksum, fileChecksum);
}
}
executorService.shutdown();
executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
System.out.format("%d. Total parse time for %d files (%d KB, %d tokens%s): %.0fms%n", currentPass + 1, inputCount, inputSize / 1024, tokenCount.get(currentPass), COMPUTE_CHECKSUM ? String.format(", checksum 0x%8X", checksum.getValue()) : "", (double) (System.nanoTime() - startTime) / 1000000.0);
if (sharedLexers.length > 0) {
int index = FILE_GRANULARITY ? 0 : ((NumberedThread) Thread.currentThread()).getThreadNumber();
Lexer lexer = sharedLexers[index];
final LexerATNSimulator lexerInterpreter = lexer.getInterpreter();
final DFA[] modeToDFA = lexerInterpreter.atn.modeToDFA;
if (SHOW_DFA_STATE_STATS) {
int states = 0;
int configs = 0;
Set<ATNConfig> uniqueConfigs = new HashSet<ATNConfig>();
for (int i = 0; i < modeToDFA.length; i++) {
DFA dfa = modeToDFA[i];
if (dfa == null) {
continue;
}
states += dfa.states.size();
for (DFAState state : dfa.states.values()) {
configs += state.configs.size();
uniqueConfigs.addAll(state.configs);
}
}
System.out.format("There are %d lexer DFAState instances, %d configs (%d unique), %d prediction contexts.%n", states, configs, uniqueConfigs.size(), lexerInterpreter.atn.getContextCacheSize());
if (DETAILED_DFA_STATE_STATS) {
System.out.format("\tMode\tStates\tConfigs\tMode%n");
for (int i = 0; i < modeToDFA.length; i++) {
DFA dfa = modeToDFA[i];
if (dfa == null || dfa.states.isEmpty()) {
continue;
}
int modeConfigs = 0;
for (DFAState state : dfa.states.values()) {
modeConfigs += state.configs.size();
}
String modeName = lexer.getModeNames()[i];
System.out.format("\t%d\t%d\t%d\t%s%n", dfa.decision, dfa.states.size(), modeConfigs, modeName);
}
}
}
}
if (RUN_PARSER && sharedParsers.length > 0) {
int index = FILE_GRANULARITY ? 0 : ((NumberedThread) Thread.currentThread()).getThreadNumber();
Parser parser = sharedParsers[index];
// make sure the individual DFAState objects actually have unique ATNConfig arrays
final ParserATNSimulator interpreter = parser.getInterpreter();
final DFA[] decisionToDFA = interpreter.atn.decisionToDFA;
if (SHOW_DFA_STATE_STATS) {
int states = 0;
int configs = 0;
Set<ATNConfig> uniqueConfigs = new HashSet<ATNConfig>();
for (int i = 0; i < decisionToDFA.length; i++) {
DFA dfa = decisionToDFA[i];
if (dfa == null) {
continue;
}
states += dfa.states.size();
for (DFAState state : dfa.states.values()) {
configs += state.configs.size();
uniqueConfigs.addAll(state.configs);
}
}
System.out.format("There are %d parser DFAState instances, %d configs (%d unique), %d prediction contexts.%n", states, configs, uniqueConfigs.size(), interpreter.atn.getContextCacheSize());
if (DETAILED_DFA_STATE_STATS) {
if (COMPUTE_TRANSITION_STATS) {
System.out.format("\tDecision\tStates\tConfigs\tPredict (ALL)\tPredict (LL)\tNon-SLL\tTransitions\tTransitions (ATN)\tTransitions (LL)\tLA (SLL)\tLA (LL)\tRule%n");
} else {
System.out.format("\tDecision\tStates\tConfigs\tRule%n");
}
for (int i = 0; i < decisionToDFA.length; i++) {
DFA dfa = decisionToDFA[i];
if (dfa == null || dfa.states.isEmpty()) {
continue;
}
int decisionConfigs = 0;
for (DFAState state : dfa.states.values()) {
decisionConfigs += state.configs.size();
}
String ruleName = parser.getRuleNames()[parser.getATN().decisionToState.get(dfa.decision).ruleIndex];
long calls = 0;
long fullContextCalls = 0;
long nonSllCalls = 0;
long transitions = 0;
long computedTransitions = 0;
long fullContextTransitions = 0;
double lookahead = 0;
double fullContextLookahead = 0;
String formatString;
if (COMPUTE_TRANSITION_STATS) {
for (long[] data : decisionInvocationsPerFile[currentPass]) {
calls += data[i];
}
for (long[] data : fullContextFallbackPerFile[currentPass]) {
fullContextCalls += data[i];
}
for (long[] data : nonSllPerFile[currentPass]) {
nonSllCalls += data[i];
}
for (long[] data : totalTransitionsPerDecisionPerFile[currentPass]) {
transitions += data[i];
}
for (long[] data : computedTransitionsPerDecisionPerFile[currentPass]) {
computedTransitions += data[i];
}
for (long[] data : fullContextTransitionsPerDecisionPerFile[currentPass]) {
fullContextTransitions += data[i];
}
if (calls > 0) {
lookahead = (double) (transitions - fullContextTransitions) / (double) calls;
}
if (fullContextCalls > 0) {
fullContextLookahead = (double) fullContextTransitions / (double) fullContextCalls;
}
formatString = "\t%1$d\t%2$d\t%3$d\t%4$d\t%5$d\t%6$d\t%7$d\t%8$d\t%9$d\t%10$f\t%11$f\t%12$s%n";
} else {
calls = 0;
formatString = "\t%1$d\t%2$d\t%3$d\t%12$s%n";
}
System.out.format(formatString, dfa.decision, dfa.states.size(), decisionConfigs, calls, fullContextCalls, nonSllCalls, transitions, computedTransitions, fullContextTransitions, lookahead, fullContextLookahead, ruleName);
}
}
}
int localDfaCount = 0;
int globalDfaCount = 0;
int localConfigCount = 0;
int globalConfigCount = 0;
int[] contextsInDFAState = new int[0];
for (int i = 0; i < decisionToDFA.length; i++) {
DFA dfa = decisionToDFA[i];
if (dfa == null) {
continue;
}
if (SHOW_CONFIG_STATS) {
for (DFAState state : dfa.states.keySet()) {
if (state.configs.size() >= contextsInDFAState.length) {
contextsInDFAState = Arrays.copyOf(contextsInDFAState, state.configs.size() + 1);
}
if (state.isAcceptState()) {
boolean hasGlobal = false;
for (ATNConfig config : state.configs) {
if (config.getReachesIntoOuterContext()) {
globalConfigCount++;
hasGlobal = true;
} else {
localConfigCount++;
}
}
if (hasGlobal) {
globalDfaCount++;
} else {
localDfaCount++;
}
}
contextsInDFAState[state.configs.size()]++;
}
}
if (EXPORT_LARGEST_CONFIG_CONTEXTS) {
for (DFAState state : dfa.states.keySet()) {
for (ATNConfig config : state.configs) {
String configOutput = config.toDotString();
if (configOutput.length() <= configOutputSize) {
continue;
}
configOutputSize = configOutput.length();
writeFile(tmpdir, "d" + dfa.decision + ".s" + state.stateNumber + ".a" + config.getAlt() + ".config.dot", configOutput);
}
}
}
}
if (SHOW_CONFIG_STATS && currentPass == 0) {
System.out.format(" DFA accept states: %d total, %d with only local context, %d with a global context%n", localDfaCount + globalDfaCount, localDfaCount, globalDfaCount);
System.out.format(" Config stats: %d total, %d local, %d global%n", localConfigCount + globalConfigCount, localConfigCount, globalConfigCount);
if (SHOW_DFA_STATE_STATS) {
for (int i = 0; i < contextsInDFAState.length; i++) {
if (contextsInDFAState[i] != 0) {
System.out.format(" %d configs = %d%n", i, contextsInDFAState[i]);
}
}
}
}
}
if (COMPUTE_TIMING_STATS) {
System.out.format("File\tTokens\tTime%n");
for (int i = 0; i < timePerFile[currentPass].length; i++) {
System.out.format("%d\t%d\t%d%n", i + 1, tokensPerFile[currentPass][i], timePerFile[currentPass][i]);
}
}
}
use of org.antlr.v4.runtime.atn.ParserATNSimulator in project checkstyle by checkstyle.
the class JavaAstVisitorTest method testNoStackOverflowOnDeepStringConcat.
/**
* This test exists to kill surviving mutation from pitest removing expression AST building
* optimization in {@link JavaAstVisitor#visitBinOp(JavaLanguageParser.BinOpContext)}.
* We do not use {@link JavaParser#parse(FileContents)} here due to DFA clearing hack.
*
* <p>
* Reason: we have iterative expression AST building to avoid stackoverflow
* in {@link JavaAstVisitor#visitBinOp(JavaLanguageParser.BinOpContext)}. In actual
* generated parser, we avoid stackoverflow thanks to the left recursive expression
* rule (eliminating unnecessary recursive calls to hierarchical expression production rules).
* However, ANTLR's ParserATNSimulator has no such optimization. So, the number of recursive
* calls to ParserATNSimulator#closure when calling ParserATNSimulator#clearDFA causes a
* StackOverflow error. We avoid this by using the single argument constructor (thus not
* forcing DFA clearing) in this test.
* </p>
*
* @throws Exception if input file does not exist
*/
@Test
public void testNoStackOverflowOnDeepStringConcat() throws Exception {
final File file = new File(getPath("InputJavaAstVisitorNoStackOverflowOnDeepStringConcat.java"));
final FileText fileText = new FileText(file, StandardCharsets.UTF_8.name());
final FileContents contents = new FileContents(fileText);
final String fullText = contents.getText().getFullText().toString();
final CharStream codePointCharStream = CharStreams.fromString(fullText);
final JavaLanguageLexer lexer = new JavaLanguageLexer(codePointCharStream, true);
lexer.setCommentListener(contents);
final CommonTokenStream tokenStream = new CommonTokenStream(lexer);
final JavaLanguageParser parser = new JavaLanguageParser(tokenStream);
final JavaLanguageParser.CompilationUnitContext compilationUnit = parser.compilationUnit();
// We restrict execution to use limited resources here, so that we can
// kill surviving pitest mutation from removal of nested binary operation
// optimization in JavaAstVisitor#visitBinOp. Limited resources (small stack size)
// ensure that we throw a StackOverflowError if optimization is removed.
final DetailAST root = TestUtil.getResultWithLimitedResources(() -> new JavaAstVisitor(tokenStream).visit(compilationUnit));
assertWithMessage("File parsing and AST building should complete successfully.").that(root).isNotNull();
}
use of org.antlr.v4.runtime.atn.ParserATNSimulator in project antlr4 by antlr.
the class TestATNParserPrediction method checkPredictedAlt.
/**
* first check that the ATN predicts right alt.
* Then check adaptive prediction.
*/
public void checkPredictedAlt(LexerGrammar lg, Grammar g, int decision, String inputString, int expectedAlt) {
Tool.internalOption_ShowATNConfigsInDFA = true;
ATN lexatn = createATN(lg, true);
LexerATNSimulator lexInterp = new LexerATNSimulator(lexatn, new DFA[] { new DFA(lexatn.modeToStartState.get(Lexer.DEFAULT_MODE)) }, new PredictionContextCache());
IntegerList types = getTokenTypesViaATN(inputString, lexInterp);
// System.out.println(types);
semanticProcess(lg);
g.importVocab(lg);
semanticProcess(g);
ParserATNFactory f = new ParserATNFactory(g);
ATN atn = f.createATN();
DOTGenerator dot = new DOTGenerator(g);
Rule r = g.getRule("a");
// if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("b");
// if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("e");
// if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("ifstat");
// if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("block");
// if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
// Check ATN prediction
// ParserATNSimulator interp = new ParserATNSimulator(atn);
TokenStream input = new MockIntTokenStream(types);
ParserInterpreterForTesting interp = new ParserInterpreterForTesting(g, input);
int alt = interp.adaptivePredict(input, decision, ParserRuleContext.EMPTY);
assertEquals(expectedAlt, alt);
// Check adaptive prediction
input.seek(0);
alt = interp.adaptivePredict(input, decision, null);
assertEquals(expectedAlt, alt);
// run 2x; first time creates DFA in atn
input.seek(0);
alt = interp.adaptivePredict(input, decision, null);
assertEquals(expectedAlt, alt);
}
use of org.antlr.v4.runtime.atn.ParserATNSimulator in project antlr4 by antlr.
the class GrammarParserInterpreter method getAllPossibleParseTrees.
/**
* Given an ambiguous parse information, return the list of ambiguous parse trees.
* An ambiguity occurs when a specific token sequence can be recognized
* in more than one way by the grammar. These ambiguities are detected only
* at decision points.
*
* The list of trees includes the actual interpretation (that for
* the minimum alternative number) and all ambiguous alternatives.
* The actual interpretation is always first.
*
* This method reuses the same physical input token stream used to
* detect the ambiguity by the original parser in the first place.
* This method resets/seeks within but does not alter originalParser.
*
* The trees are rooted at the node whose start..stop token indices
* include the start and stop indices of this ambiguity event. That is,
* the trees returned will always include the complete ambiguous subphrase
* identified by the ambiguity event. The subtrees returned will
* also always contain the node associated with the overridden decision.
*
* Be aware that this method does NOT notify error or parse listeners as
* it would trigger duplicate or otherwise unwanted events.
*
* This uses a temporary ParserATNSimulator and a ParserInterpreter
* so we don't mess up any statistics, event lists, etc...
* The parse tree constructed while identifying/making ambiguityInfo is
* not affected by this method as it creates a new parser interp to
* get the ambiguous interpretations.
*
* Nodes in the returned ambig trees are independent of the original parse
* tree (constructed while identifying/creating ambiguityInfo).
*
* @since 4.5.1
*
* @param g From which grammar should we drive alternative
* numbers and alternative labels.
*
* @param originalParser The parser used to create ambiguityInfo; it
* is not modified by this routine and can be either
* a generated or interpreted parser. It's token
* stream *is* reset/seek()'d.
* @param tokens A stream of tokens to use with the temporary parser.
* This will often be just the token stream within the
* original parser but here it is for flexibility.
*
* @param decision Which decision to try different alternatives for.
*
* @param alts The set of alternatives to try while re-parsing.
*
* @param startIndex The index of the first token of the ambiguous
* input or other input of interest.
*
* @param stopIndex The index of the last token of the ambiguous input.
* The start and stop indexes are used primarily to
* identify how much of the resulting parse tree
* to return.
*
* @param startRuleIndex The start rule for the entire grammar, not
* the ambiguous decision. We re-parse the entire input
* and so we need the original start rule.
*
* @return The list of all possible interpretations of
* the input for the decision in ambiguityInfo.
* The actual interpretation chosen by the parser
* is always given first because this method
* retests the input in alternative order and
* ANTLR always resolves ambiguities by choosing
* the first alternative that matches the input.
* The subtree returned
*
* @throws RecognitionException Throws upon syntax error while matching
* ambig input.
*/
public static List<ParserRuleContext> getAllPossibleParseTrees(Grammar g, Parser originalParser, TokenStream tokens, int decision, BitSet alts, int startIndex, int stopIndex, int startRuleIndex) throws RecognitionException {
List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
// Create a new parser interpreter to parse the ambiguous subphrase
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
if (stopIndex >= (tokens.size() - 1)) {
// if we are pointing at EOF token
// EOF is not in tree, so must be 1 less than last non-EOF token
stopIndex = tokens.size() - 2;
}
// get ambig trees
int alt = alts.nextSetBit(0);
while (alt >= 0) {
// re-parse entire input for all ambiguous alternatives
// (don't have to do first as it's been parsed, but do again for simplicity
// using this temp parser.)
parser.reset();
parser.addDecisionOverride(decision, startIndex, alt);
ParserRuleContext t = parser.parse(startRuleIndex);
GrammarInterpreterRuleContext ambigSubTree = (GrammarInterpreterRuleContext) Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex);
// Use higher of overridden decision tree or tree enclosing all tokens
if (Trees.isAncestorOf(parser.getOverrideDecisionRoot(), ambigSubTree)) {
ambigSubTree = (GrammarInterpreterRuleContext) parser.getOverrideDecisionRoot();
}
trees.add(ambigSubTree);
alt = alts.nextSetBit(alt + 1);
}
return trees;
}
Aggregations