use of edu.stanford.nlp.parser.metrics.ParserQueryEval in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method testOnTreebank.
/** Test the parser on a treebank. Parses will be written to stdout, and
* various other information will be written to stderr and stdout,
* particularly if <code>op.testOptions.verbose</code> is true.
*
* @param testTreebank The treebank to parse
* @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
* of the parser on the treebank.
*/
public double testOnTreebank(Treebank testTreebank) {
log.info("Testing on treebank");
Timing treebankTotalTimer = new Timing();
TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
TreebankLangParserParams tlpParams = op.tlpParams;
TreebankLanguagePack tlp = op.langpack();
PrintWriter pwOut, pwErr;
if (op.testOptions.quietEvaluation) {
NullOutputStream quiet = new NullOutputStream();
pwOut = tlpParams.pw(quiet);
pwErr = tlpParams.pw(quiet);
} else {
pwOut = tlpParams.pw();
pwErr = tlpParams.pw(System.err);
}
if (op.testOptions.verbose) {
pwErr.print("Testing ");
pwErr.println(testTreebank.textualSummary(tlp));
}
if (op.testOptions.evalb) {
EvalbFormatWriter.initEVALBfiles(tlpParams);
}
PrintWriter pwFileOut = null;
if (op.testOptions.writeOutputFiles) {
String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
try {
pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
PrintWriter pwStats = null;
if (op.testOptions.outputkBestEquivocation != null) {
try {
pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
if (op.testOptions.testingThreads != 1) {
MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
LinkedList<Tree> goldTrees = new LinkedList<>();
for (Tree goldTree : testTreebank) {
List<? extends HasWord> sentence = getInputSentence(goldTree);
goldTrees.add(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
wrapper.put(sentence);
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
}
// for tree iterator
wrapper.join();
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
Tree goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
} else {
ParserQuery pq = pqFactory.parserQuery();
for (Tree goldTree : testTreebank) {
final List<CoreLabel> sentence = getInputSentence(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
pq.parseAndReport(sentence, pwErr);
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
// for tree iterator
}
//Done parsing...print the results of the evaluations
treebankTotalTimer.done("Testing on treebank");
if (op.testOptions.quietEvaluation) {
pwErr = tlpParams.pw(System.err);
}
if (saidMemMessage) {
ParserUtils.printOutOfMemory(pwErr);
}
if (op.testOptions.evalb) {
EvalbFormatWriter.closeEVALBfiles();
}
if (numSkippedEvals != 0) {
pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
}
// only created here so we know what parser types are supported...
ParserQuery pq = pqFactory.parserQuery();
if (summary) {
if (pcfgLB != null)
pcfgLB.display(false, pwErr);
if (pcfgChildSpecific != null)
pcfgChildSpecific.display(false, pwErr);
if (pcfgLA != null)
pcfgLA.display(false, pwErr);
if (pcfgCB != null)
pcfgCB.display(false, pwErr);
if (pcfgDA != null)
pcfgDA.display(false, pwErr);
if (pcfgTA != null)
pcfgTA.display(false, pwErr);
if (pcfgLL != null && pq.getPCFGParser() != null)
pcfgLL.display(false, pwErr);
if (depDA != null)
depDA.display(false, pwErr);
if (depTA != null)
depTA.display(false, pwErr);
if (depLL != null && pq.getDependencyParser() != null)
depLL.display(false, pwErr);
if (factLB != null)
factLB.display(false, pwErr);
if (factChildSpecific != null)
factChildSpecific.display(false, pwErr);
if (factLA != null)
factLA.display(false, pwErr);
if (factCB != null)
factCB.display(false, pwErr);
if (factDA != null)
factDA.display(false, pwErr);
if (factTA != null)
factTA.display(false, pwErr);
if (factLL != null && pq.getFactoredParser() != null)
factLL.display(false, pwErr);
if (pcfgCatE != null)
pcfgCatE.display(false, pwErr);
for (Eval eval : evals) {
eval.display(false, pwErr);
}
for (BestOfTopKEval eval : topKEvals) {
eval.display(false, pwErr);
}
}
// these ones only have a display mode, so display if turned on!!
if (pcfgRUO != null)
pcfgRUO.display(true, pwErr);
if (pcfgCUO != null)
pcfgCUO.display(true, pwErr);
if (tsv) {
NumberFormat nf = new DecimalFormat("0.00");
pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
if (factLB != null)
pwErr.print(nf.format(factLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && factDA != null)
pwErr.print(nf.format(factDA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(nf.format(factLB.getExactPercent()));
pwErr.print("\t");
if (pcfgLB != null)
pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && depDA != null)
pwErr.print(nf.format(depDA.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getPCFGParser() != null && factTA != null)
pwErr.print(nf.format(factTA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(factLB.getNum());
pwErr.println();
}
double f1 = 0.0;
if (factLB != null) {
f1 = factLB.getEvalbF1();
}
//Close files (if necessary)
if (pwFileOut != null)
pwFileOut.close();
if (pwStats != null)
pwStats.close();
if (parserQueryEvals != null) {
for (ParserQueryEval parserQueryEval : parserQueryEvals) {
parserQueryEval.display(false, pwErr);
}
}
return f1;
}
use of edu.stanford.nlp.parser.metrics.ParserQueryEval in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method processResults.
public void processResults(ParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) {
if (pq.saidMemMessage()) {
saidMemMessage = true;
}
Tree tree;
List<? extends HasWord> sentence = pq.originalSentence();
try {
tree = pq.getBestParse();
} catch (NoSuchParseException e) {
tree = null;
}
List<ScoredObject<Tree>> kbestPCFGTrees = null;
if (tree != null && kbestPCFG > 0) {
kbestPCFGTrees = pq.getKBestPCFGParses(kbestPCFG);
}
//combo parse goes to pwOut (System.out)
if (op.testOptions.verbose) {
pwOut.println("ComboParser best");
Tree ot = tree;
if (ot != null && !op.tlpParams.treebankLanguagePack().isStartSymbol(ot.value())) {
ot = ot.treeFactory().newTreeNode(op.tlpParams.treebankLanguagePack().startSymbol(), Collections.singletonList(ot));
}
treePrint.printTree(ot, pwOut);
} else {
treePrint.printTree(tree, pwOut);
}
// print various statistics
if (tree != null) {
if (op.testOptions.printAllBestParses) {
List<ScoredObject<Tree>> parses = pq.getBestPCFGParses();
int sz = parses.size();
if (sz > 1) {
pwOut.println("There were " + sz + " best PCFG parses with score " + parses.get(0).score() + '.');
Tree transGoldTree = collinizer.transformTree(goldTree);
int iii = 0;
for (ScoredObject<Tree> sot : parses) {
iii++;
Tree tb = sot.object();
Tree tbd = debinarizer.transformTree(tb);
tbd = subcategoryStripper.transformTree(tbd);
pq.restoreOriginalWords(tbd);
pwOut.println("PCFG Parse #" + iii + " with score " + tbd.score());
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
// pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
}
}
} else // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) {
List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
Tree transGoldTree = collinizer.transformTree(goldTree);
int i = 0;
for (ScoredObject<Tree> tp : trees) {
i++;
pwOut.println("PCFG Parse #" + i + " with score " + tp.score());
Tree tbd = tp.object();
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
}
} else // Chart parser (factored) n-best list
if (op.testOptions.printFactoredKGood > 0 && pq.hasFactoredParse()) {
// DZ: debug n best trees
List<ScoredObject<Tree>> trees = pq.getKGoodFactoredParses(op.testOptions.printFactoredKGood);
Tree transGoldTree = collinizer.transformTree(goldTree);
int ii = 0;
for (ScoredObject<Tree> tp : trees) {
ii++;
pwOut.println("Factored Parse #" + ii + " with score " + tp.score());
Tree tbd = tp.object();
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
kGoodLB.evaluate(tbtr, transGoldTree, pwOut);
}
} else //1-best output
if (pwFileOut != null) {
pwFileOut.println(tree.toString());
}
//Print the derivational entropy
if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) {
List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
double[] logScores = new double[trees.size()];
int treeId = 0;
for (ScoredObject<Tree> kBestTree : trees) logScores[treeId++] = kBestTree.score();
//Re-normalize
double entropy = 0.0;
double denom = ArrayMath.logSum(logScores);
for (double logScore : logScores) {
double logPr = logScore - denom;
entropy += Math.exp(logPr) * (logPr / Math.log(2));
}
//Convert to bits
entropy *= -1;
pwStats.printf("%f\t%d\t%d\n", entropy, trees.size(), sentence.size());
}
}
// Perform various evaluations specified by the user
if (tree != null) {
//Strip subcategories and remove punctuation for evaluation
tree = subcategoryStripper.transformTree(tree);
Tree treeFact = collinizer.transformTree(tree);
//Setup the gold tree
if (op.testOptions.verbose) {
pwOut.println("Correct parse");
treePrint.printTree(goldTree, pwOut);
}
Tree transGoldTree = collinizer.transformTree(goldTree);
if (transGoldTree != null)
transGoldTree = subcategoryStripper.transformTree(transGoldTree);
//Can't do evaluation in these two cases
if (transGoldTree == null) {
pwErr.println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
goldTree.pennPrint(pwErr);
numSkippedEvals++;
return;
} else if (treeFact == null) {
pwErr.println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
tree.pennPrint(pwErr);
numSkippedEvals++;
return;
} else if (treeFact.yield().size() != transGoldTree.yield().size()) {
List<Label> fYield = treeFact.yield();
List<Label> gYield = transGoldTree.yield();
pwErr.println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
pwErr.printf(" sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.size(), goldTree.yield().size(), fYield.size(), tree.yield().size());
pwErr.println(" gold: " + SentenceUtils.listToString(gYield, true));
pwErr.println(" pars: " + SentenceUtils.listToString(fYield, true));
numSkippedEvals++;
return;
}
if (topKEvals.size() > 0) {
List<Tree> transGuesses = new ArrayList<>();
int kbest = Math.min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.size());
for (ScoredObject<Tree> guess : kbestPCFGTrees.subList(0, kbest)) {
transGuesses.add(collinizer.transformTree(guess.object()));
}
for (BestOfTopKEval eval : topKEvals) {
eval.evaluate(transGuesses, transGoldTree, pwErr);
}
}
//PCFG eval
Tree treePCFG = pq.getBestPCFGParse();
if (treePCFG != null) {
Tree treePCFGeval = collinizer.transformTree(treePCFG);
if (pcfgLB != null) {
pcfgLB.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgChildSpecific != null) {
pcfgChildSpecific.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgLA != null) {
pcfgLA.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgCB != null) {
pcfgCB.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgDA != null) {
// Re-index the leaves after Collinization, stripping traces, etc.
treePCFGeval.indexLeaves(true);
transGoldTree.indexLeaves(true);
pcfgDA.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgTA != null) {
pcfgTA.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgLL != null && pq.getPCFGParser() != null) {
pcfgLL.recordScore(pq.getPCFGParser(), pwErr);
}
if (pcfgRUO != null) {
pcfgRUO.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgCUO != null) {
pcfgCUO.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgCatE != null) {
pcfgCatE.evaluate(treePCFGeval, transGoldTree, pwErr);
}
}
//Dependency eval
// todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
Tree treeDep = pq.getBestDependencyParse(false);
if (treeDep != null) {
Tree goldTreeB = binarizerOnly.transformTree(goldTree);
Tree goldTreeEval = goldTree.deepCopy();
goldTreeEval.indexLeaves(true);
goldTreeEval.percolateHeads(op.langpack().headFinder());
Tree depDAEval = pq.getBestDependencyParse(true);
depDAEval.indexLeaves(true);
depDAEval.percolateHeadIndices();
if (depDA != null) {
depDA.evaluate(depDAEval, goldTreeEval, pwErr);
}
if (depTA != null) {
Tree undoneTree = debinarizer.transformTree(treeDep);
undoneTree = subcategoryStripper.transformTree(undoneTree);
pq.restoreOriginalWords(undoneTree);
// pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
depTA.evaluate(undoneTree, goldTree, pwErr);
}
if (depLL != null && pq.getDependencyParser() != null) {
depLL.recordScore(pq.getDependencyParser(), pwErr);
}
Tree factTreeB;
if (pq.hasFactoredParse()) {
factTreeB = pq.getBestFactoredParse();
} else {
factTreeB = treeDep;
}
if (factDA != null) {
factDA.evaluate(factTreeB, goldTreeB, pwErr);
}
}
//Factored parser (1best) eval
if (factLB != null) {
factLB.evaluate(treeFact, transGoldTree, pwErr);
}
if (factChildSpecific != null) {
factChildSpecific.evaluate(treeFact, transGoldTree, pwErr);
}
if (factLA != null) {
factLA.evaluate(treeFact, transGoldTree, pwErr);
}
if (factTA != null) {
factTA.evaluate(tree, boundaryRemover.transformTree(goldTree), pwErr);
}
if (factLL != null && pq.getFactoredParser() != null) {
factLL.recordScore(pq.getFactoredParser(), pwErr);
}
if (factCB != null) {
factCB.evaluate(treeFact, transGoldTree, pwErr);
}
for (Eval eval : evals) {
eval.evaluate(treeFact, transGoldTree, pwErr);
}
if (parserQueryEvals != null) {
for (ParserQueryEval eval : parserQueryEvals) {
eval.evaluate(pq, transGoldTree, pwErr);
}
}
if (op.testOptions.evalb) {
// empty out scores just in case
nanScores(tree);
EvalbFormatWriter.writeEVALBline(treeFact, transGoldTree);
}
}
pwErr.println();
}
Aggregations