use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.
the class LexicalizedParserITest method testChineseDependenciesSemanticHead.
public void testChineseDependenciesSemanticHead() {
Tree tree = chineseParser.parse(chineseTest2);
compareSingleOutput(tree, false, chinesePennPrint, expectedChineseTree2);
compareSingleOutput(tree, false, chineseTypDepPrint, expectedChineseDeps2sd);
TreePrint paramsTreePrint = new TreePrint("typedDependencies", "basicDependencies", chineseParser.treebankLanguagePack(), chineseParser.getTLPParams().headFinder(), chineseParser.getTLPParams().typedDependencyHeadFinder());
compareSingleOutput(tree, false, paramsTreePrint, expectedChineseDeps2sd);
}
use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.
the class LexicalizedParserITest method setUp.
// TODO: add more tests
@Override
public void setUp() throws Exception {
synchronized (LexicalizedParserITest.class) {
if (englishParser == null) {
// sharing a bunch of code here with the webapp in
// parser/webapp/index.jsp... perhaps we could reuse that code
englishParser = LexicalizedParser.loadModel();
TreebankLanguagePack tLP = englishParser.getOp().tlpParams.treebankLanguagePack();
tagPrint = new TreePrint("wordsAndTags", tLP);
pennPrint = new TreePrint("penn", tLP);
typDepPrint = new TreePrint("typedDependencies", "basicDependencies", tLP);
// default is now CCprocessed
typDepColPrint = new TreePrint("typedDependencies", tLP);
File englishPath = new File(LexicalizedParser.DEFAULT_PARSER_LOC);
String chinesePath = (englishPath.getParent() + File.separator + "chineseFactored.ser.gz");
chineseParser = LexicalizedParser.loadModel(chinesePath);
tLP = chineseParser.getOp().tlpParams.treebankLanguagePack();
// test was made with Chinese SD not UD
chineseParser.getTLPParams().setGenerateOriginalDependencies(true);
chinesePennPrint = new TreePrint("penn", tLP);
chineseTypDepPrint = new TreePrint("typedDependencies", "basicDependencies", tLP);
}
}
}
use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.
the class JSONOutputter method print.
/** {@inheritDoc} */
// It's lying; we need the "redundant" casts (as of 2014-09-08)
@SuppressWarnings("RedundantCast")
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
JSONWriter l0 = new JSONWriter(writer, options);
l0.object(l1 -> {
l1.set("docId", doc.get(CoreAnnotations.DocIDAnnotation.class));
l1.set("docDate", doc.get(CoreAnnotations.DocDateAnnotation.class));
l1.set("docSourceType", doc.get(CoreAnnotations.DocSourceTypeAnnotation.class));
l1.set("docType", doc.get(CoreAnnotations.DocTypeAnnotation.class));
l1.set("author", doc.get(CoreAnnotations.AuthorAnnotation.class));
l1.set("location", doc.get(CoreAnnotations.LocationAnnotation.class));
if (options.includeText) {
l1.set("text", doc.get(CoreAnnotations.TextAnnotation.class));
}
if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
l1.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<Writer>) (Writer l2) -> {
l2.set("id", sentence.get(CoreAnnotations.SentenceIDAnnotation.class));
l2.set("index", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
l2.set("line", sentence.get(CoreAnnotations.LineNumberAnnotation.class));
StringWriter treeStrWriter = new StringWriter();
TreePrint treePrinter = options.constituentTreePrinter;
if (treePrinter == AnnotationOutputter.DEFAULT_CONSTITUENT_TREE_PRINTER) {
treePrinter = new TreePrint("oneline");
}
treePrinter.printTree(sentence.get(TreeCoreAnnotations.TreeAnnotation.class), new PrintWriter(treeStrWriter, true));
String treeStr = treeStrWriter.toString().trim();
if (!"SENTENCE_SKIPPED_OR_UNPARSABLE".equals(treeStr)) {
l2.set("parse", treeStr);
}
l2.set("basicDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)));
l2.set("enhancedDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)));
l2.set("enhancedPlusPlusDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)));
Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
if (sentimentTree != null) {
int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
l2.set("sentimentValue", Integer.toString(sentiment));
l2.set("sentiment", sentimentClass.replaceAll(" ", ""));
}
Collection<RelationTriple> openIETriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openIETriples != null) {
l2.set("openie", openIETriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
tripleWriter.set("subject", triple.subjectGloss());
tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
tripleWriter.set("relation", triple.relationGloss());
tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
tripleWriter.set("object", triple.objectGloss());
tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
}));
}
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null) {
l2.set("kbp", kbpTriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
tripleWriter.set("subject", triple.subjectGloss());
tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
tripleWriter.set("relation", triple.relationGloss());
tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
tripleWriter.set("object", triple.objectGloss());
tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
}));
}
if (sentence.get(CoreAnnotations.MentionsAnnotation.class) != null) {
Integer sentTokenBegin = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
l2.set("entitymentions", sentence.get(CoreAnnotations.MentionsAnnotation.class).stream().map(m -> (Consumer<Writer>) (Writer l3) -> {
Integer tokenBegin = m.get(CoreAnnotations.TokenBeginAnnotation.class);
Integer tokenEnd = m.get(CoreAnnotations.TokenEndAnnotation.class);
l3.set("docTokenBegin", tokenBegin);
l3.set("docTokenEnd", tokenEnd);
if (tokenBegin != null && sentTokenBegin != null) {
l3.set("tokenBegin", tokenBegin - sentTokenBegin);
}
if (tokenEnd != null && sentTokenBegin != null) {
l3.set("tokenEnd", tokenEnd - sentTokenBegin);
}
l3.set("text", m.get(CoreAnnotations.TextAnnotation.class));
l3.set("characterOffsetBegin", m.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
l3.set("characterOffsetEnd", m.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
l3.set("ner", m.get(CoreAnnotations.NamedEntityTagAnnotation.class));
l3.set("normalizedNER", m.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
l3.set("entitylink", m.get(CoreAnnotations.WikipediaEntityAnnotation.class));
Timex time = m.get(TimeAnnotations.TimexAnnotation.class);
if (time != null) {
Timex.Range range = time.range();
l3.set("timex", (Consumer<Writer>) l4 -> {
l4.set("tid", time.tid());
l4.set("type", time.timexType());
l4.set("value", time.value());
l4.set("altValue", time.altVal());
l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
l5.set("begin", range.begin);
l5.set("end", range.end);
l5.set("duration", range.duration);
} : null);
});
}
}));
}
if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
l2.set("tokens", sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(token -> (Consumer<Writer>) (Writer l3) -> {
l3.set("index", token.index());
l3.set("word", token.word());
l3.set("originalText", token.originalText());
l3.set("lemma", token.lemma());
l3.set("characterOffsetBegin", token.beginPosition());
l3.set("characterOffsetEnd", token.endPosition());
l3.set("pos", token.tag());
l3.set("ner", token.ner());
l3.set("normalizedNER", token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
l3.set("speaker", token.get(CoreAnnotations.SpeakerAnnotation.class));
l3.set("truecase", token.get(CoreAnnotations.TrueCaseAnnotation.class));
l3.set("truecaseText", token.get(CoreAnnotations.TrueCaseTextAnnotation.class));
l3.set("before", token.get(CoreAnnotations.BeforeAnnotation.class));
l3.set("after", token.get(CoreAnnotations.AfterAnnotation.class));
l3.set("entitylink", token.get(CoreAnnotations.WikipediaEntityAnnotation.class));
Timex time = token.get(TimeAnnotations.TimexAnnotation.class);
if (time != null) {
Timex.Range range = time.range();
l3.set("timex", (Consumer<Writer>) l4 -> {
l4.set("tid", time.tid());
l4.set("type", time.timexType());
l4.set("value", time.value());
l4.set("altValue", time.altVal());
l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
l5.set("begin", range.begin);
l5.set("end", range.end);
l5.set("duration", range.duration);
} : null);
});
}
}));
}
}));
}
if (doc.get(CorefCoreAnnotations.CorefChainAnnotation.class) != null) {
Map<Integer, CorefChain> corefChains = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
l1.set("corefs", (Consumer<Writer>) chainWriter -> {
for (CorefChain chain : corefChains.values()) {
CorefChain.CorefMention representative = chain.getRepresentativeMention();
chainWriter.set(Integer.toString(chain.getChainID()), chain.getMentionsInTextualOrder().stream().map(mention -> (Consumer<Writer>) (Writer mentionWriter) -> {
mentionWriter.set("id", mention.mentionID);
mentionWriter.set("text", mention.mentionSpan);
mentionWriter.set("type", mention.mentionType);
mentionWriter.set("number", mention.number);
mentionWriter.set("gender", mention.gender);
mentionWriter.set("animacy", mention.animacy);
mentionWriter.set("startIndex", mention.startIndex);
mentionWriter.set("endIndex", mention.endIndex);
mentionWriter.set("headIndex", mention.headIndex);
mentionWriter.set("sentNum", mention.sentNum);
mentionWriter.set("position", Arrays.stream(mention.position.elems()).boxed().collect(Collectors.toList()));
mentionWriter.set("isRepresentativeMention", mention == representative);
}));
}
});
}
}
if (doc.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
List<CoreMap> quotes = QuoteAnnotator.gatherQuotes(doc);
l1.set("quotes", quotes.stream().map(quote -> (Consumer<Writer>) (Writer l2) -> {
l2.set("id", quote.get(CoreAnnotations.QuotationIndexAnnotation.class));
l2.set("text", quote.get(CoreAnnotations.TextAnnotation.class));
l2.set("beginIndex", quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
l2.set("endIndex", quote.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
l2.set("beginToken", quote.get(CoreAnnotations.TokenBeginAnnotation.class));
l2.set("endToken", quote.get(CoreAnnotations.TokenEndAnnotation.class));
l2.set("beginSentence", quote.get(CoreAnnotations.SentenceBeginAnnotation.class));
l2.set("endSentence", quote.get(CoreAnnotations.SentenceEndAnnotation.class));
}));
}
});
// flush
l0.writer.flush();
}
use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method testOnTreebank.
/** Test the parser on a treebank. Parses will be written to stdout, and
* various other information will be written to stderr and stdout,
* particularly if <code>op.testOptions.verbose</code> is true.
*
* @param testTreebank The treebank to parse
* @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
* of the parser on the treebank.
*/
public double testOnTreebank(Treebank testTreebank) {
log.info("Testing on treebank");
Timing treebankTotalTimer = new Timing();
TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
TreebankLangParserParams tlpParams = op.tlpParams;
TreebankLanguagePack tlp = op.langpack();
PrintWriter pwOut, pwErr;
if (op.testOptions.quietEvaluation) {
NullOutputStream quiet = new NullOutputStream();
pwOut = tlpParams.pw(quiet);
pwErr = tlpParams.pw(quiet);
} else {
pwOut = tlpParams.pw();
pwErr = tlpParams.pw(System.err);
}
if (op.testOptions.verbose) {
pwErr.print("Testing ");
pwErr.println(testTreebank.textualSummary(tlp));
}
if (op.testOptions.evalb) {
EvalbFormatWriter.initEVALBfiles(tlpParams);
}
PrintWriter pwFileOut = null;
if (op.testOptions.writeOutputFiles) {
String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
try {
pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
PrintWriter pwStats = null;
if (op.testOptions.outputkBestEquivocation != null) {
try {
pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
if (op.testOptions.testingThreads != 1) {
MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
LinkedList<Tree> goldTrees = new LinkedList<>();
for (Tree goldTree : testTreebank) {
List<? extends HasWord> sentence = getInputSentence(goldTree);
goldTrees.add(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
wrapper.put(sentence);
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
}
// for tree iterator
wrapper.join();
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
Tree goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
} else {
ParserQuery pq = pqFactory.parserQuery();
for (Tree goldTree : testTreebank) {
final List<CoreLabel> sentence = getInputSentence(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
pq.parseAndReport(sentence, pwErr);
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
// for tree iterator
}
//Done parsing...print the results of the evaluations
treebankTotalTimer.done("Testing on treebank");
if (op.testOptions.quietEvaluation) {
pwErr = tlpParams.pw(System.err);
}
if (saidMemMessage) {
ParserUtils.printOutOfMemory(pwErr);
}
if (op.testOptions.evalb) {
EvalbFormatWriter.closeEVALBfiles();
}
if (numSkippedEvals != 0) {
pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
}
// only created here so we know what parser types are supported...
ParserQuery pq = pqFactory.parserQuery();
if (summary) {
if (pcfgLB != null)
pcfgLB.display(false, pwErr);
if (pcfgChildSpecific != null)
pcfgChildSpecific.display(false, pwErr);
if (pcfgLA != null)
pcfgLA.display(false, pwErr);
if (pcfgCB != null)
pcfgCB.display(false, pwErr);
if (pcfgDA != null)
pcfgDA.display(false, pwErr);
if (pcfgTA != null)
pcfgTA.display(false, pwErr);
if (pcfgLL != null && pq.getPCFGParser() != null)
pcfgLL.display(false, pwErr);
if (depDA != null)
depDA.display(false, pwErr);
if (depTA != null)
depTA.display(false, pwErr);
if (depLL != null && pq.getDependencyParser() != null)
depLL.display(false, pwErr);
if (factLB != null)
factLB.display(false, pwErr);
if (factChildSpecific != null)
factChildSpecific.display(false, pwErr);
if (factLA != null)
factLA.display(false, pwErr);
if (factCB != null)
factCB.display(false, pwErr);
if (factDA != null)
factDA.display(false, pwErr);
if (factTA != null)
factTA.display(false, pwErr);
if (factLL != null && pq.getFactoredParser() != null)
factLL.display(false, pwErr);
if (pcfgCatE != null)
pcfgCatE.display(false, pwErr);
for (Eval eval : evals) {
eval.display(false, pwErr);
}
for (BestOfTopKEval eval : topKEvals) {
eval.display(false, pwErr);
}
}
// these ones only have a display mode, so display if turned on!!
if (pcfgRUO != null)
pcfgRUO.display(true, pwErr);
if (pcfgCUO != null)
pcfgCUO.display(true, pwErr);
if (tsv) {
NumberFormat nf = new DecimalFormat("0.00");
pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
if (factLB != null)
pwErr.print(nf.format(factLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && factDA != null)
pwErr.print(nf.format(factDA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(nf.format(factLB.getExactPercent()));
pwErr.print("\t");
if (pcfgLB != null)
pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && depDA != null)
pwErr.print(nf.format(depDA.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getPCFGParser() != null && factTA != null)
pwErr.print(nf.format(factTA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(factLB.getNum());
pwErr.println();
}
double f1 = 0.0;
if (factLB != null) {
f1 = factLB.getEvalbF1();
}
//Close files (if necessary)
if (pwFileOut != null)
pwFileOut.close();
if (pwStats != null)
pwStats.close();
if (parserQueryEvals != null) {
for (ParserQueryEval parserQueryEval : parserQueryEvals) {
parserQueryEval.display(false, pwErr);
}
}
return f1;
}
use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method processResults.
public void processResults(ParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) {
if (pq.saidMemMessage()) {
saidMemMessage = true;
}
Tree tree;
List<? extends HasWord> sentence = pq.originalSentence();
try {
tree = pq.getBestParse();
} catch (NoSuchParseException e) {
tree = null;
}
List<ScoredObject<Tree>> kbestPCFGTrees = null;
if (tree != null && kbestPCFG > 0) {
kbestPCFGTrees = pq.getKBestPCFGParses(kbestPCFG);
}
//combo parse goes to pwOut (System.out)
if (op.testOptions.verbose) {
pwOut.println("ComboParser best");
Tree ot = tree;
if (ot != null && !op.tlpParams.treebankLanguagePack().isStartSymbol(ot.value())) {
ot = ot.treeFactory().newTreeNode(op.tlpParams.treebankLanguagePack().startSymbol(), Collections.singletonList(ot));
}
treePrint.printTree(ot, pwOut);
} else {
treePrint.printTree(tree, pwOut);
}
// print various statistics
if (tree != null) {
if (op.testOptions.printAllBestParses) {
List<ScoredObject<Tree>> parses = pq.getBestPCFGParses();
int sz = parses.size();
if (sz > 1) {
pwOut.println("There were " + sz + " best PCFG parses with score " + parses.get(0).score() + '.');
Tree transGoldTree = collinizer.transformTree(goldTree);
int iii = 0;
for (ScoredObject<Tree> sot : parses) {
iii++;
Tree tb = sot.object();
Tree tbd = debinarizer.transformTree(tb);
tbd = subcategoryStripper.transformTree(tbd);
pq.restoreOriginalWords(tbd);
pwOut.println("PCFG Parse #" + iii + " with score " + tbd.score());
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
// pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
}
}
} else // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) {
List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
Tree transGoldTree = collinizer.transformTree(goldTree);
int i = 0;
for (ScoredObject<Tree> tp : trees) {
i++;
pwOut.println("PCFG Parse #" + i + " with score " + tp.score());
Tree tbd = tp.object();
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
}
} else // Chart parser (factored) n-best list
if (op.testOptions.printFactoredKGood > 0 && pq.hasFactoredParse()) {
// DZ: debug n best trees
List<ScoredObject<Tree>> trees = pq.getKGoodFactoredParses(op.testOptions.printFactoredKGood);
Tree transGoldTree = collinizer.transformTree(goldTree);
int ii = 0;
for (ScoredObject<Tree> tp : trees) {
ii++;
pwOut.println("Factored Parse #" + ii + " with score " + tp.score());
Tree tbd = tp.object();
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
kGoodLB.evaluate(tbtr, transGoldTree, pwOut);
}
} else //1-best output
if (pwFileOut != null) {
pwFileOut.println(tree.toString());
}
//Print the derivational entropy
if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) {
List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
double[] logScores = new double[trees.size()];
int treeId = 0;
for (ScoredObject<Tree> kBestTree : trees) logScores[treeId++] = kBestTree.score();
//Re-normalize
double entropy = 0.0;
double denom = ArrayMath.logSum(logScores);
for (double logScore : logScores) {
double logPr = logScore - denom;
entropy += Math.exp(logPr) * (logPr / Math.log(2));
}
//Convert to bits
entropy *= -1;
pwStats.printf("%f\t%d\t%d\n", entropy, trees.size(), sentence.size());
}
}
// Perform various evaluations specified by the user
if (tree != null) {
//Strip subcategories and remove punctuation for evaluation
tree = subcategoryStripper.transformTree(tree);
Tree treeFact = collinizer.transformTree(tree);
//Setup the gold tree
if (op.testOptions.verbose) {
pwOut.println("Correct parse");
treePrint.printTree(goldTree, pwOut);
}
Tree transGoldTree = collinizer.transformTree(goldTree);
if (transGoldTree != null)
transGoldTree = subcategoryStripper.transformTree(transGoldTree);
//Can't do evaluation in these two cases
if (transGoldTree == null) {
pwErr.println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
goldTree.pennPrint(pwErr);
numSkippedEvals++;
return;
} else if (treeFact == null) {
pwErr.println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
tree.pennPrint(pwErr);
numSkippedEvals++;
return;
} else if (treeFact.yield().size() != transGoldTree.yield().size()) {
List<Label> fYield = treeFact.yield();
List<Label> gYield = transGoldTree.yield();
pwErr.println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
pwErr.printf(" sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.size(), goldTree.yield().size(), fYield.size(), tree.yield().size());
pwErr.println(" gold: " + SentenceUtils.listToString(gYield, true));
pwErr.println(" pars: " + SentenceUtils.listToString(fYield, true));
numSkippedEvals++;
return;
}
if (topKEvals.size() > 0) {
List<Tree> transGuesses = new ArrayList<>();
int kbest = Math.min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.size());
for (ScoredObject<Tree> guess : kbestPCFGTrees.subList(0, kbest)) {
transGuesses.add(collinizer.transformTree(guess.object()));
}
for (BestOfTopKEval eval : topKEvals) {
eval.evaluate(transGuesses, transGoldTree, pwErr);
}
}
//PCFG eval
Tree treePCFG = pq.getBestPCFGParse();
if (treePCFG != null) {
Tree treePCFGeval = collinizer.transformTree(treePCFG);
if (pcfgLB != null) {
pcfgLB.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgChildSpecific != null) {
pcfgChildSpecific.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgLA != null) {
pcfgLA.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgCB != null) {
pcfgCB.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgDA != null) {
// Re-index the leaves after Collinization, stripping traces, etc.
treePCFGeval.indexLeaves(true);
transGoldTree.indexLeaves(true);
pcfgDA.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgTA != null) {
pcfgTA.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgLL != null && pq.getPCFGParser() != null) {
pcfgLL.recordScore(pq.getPCFGParser(), pwErr);
}
if (pcfgRUO != null) {
pcfgRUO.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgCUO != null) {
pcfgCUO.evaluate(treePCFGeval, transGoldTree, pwErr);
}
if (pcfgCatE != null) {
pcfgCatE.evaluate(treePCFGeval, transGoldTree, pwErr);
}
}
//Dependency eval
// todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
Tree treeDep = pq.getBestDependencyParse(false);
if (treeDep != null) {
Tree goldTreeB = binarizerOnly.transformTree(goldTree);
Tree goldTreeEval = goldTree.deepCopy();
goldTreeEval.indexLeaves(true);
goldTreeEval.percolateHeads(op.langpack().headFinder());
Tree depDAEval = pq.getBestDependencyParse(true);
depDAEval.indexLeaves(true);
depDAEval.percolateHeadIndices();
if (depDA != null) {
depDA.evaluate(depDAEval, goldTreeEval, pwErr);
}
if (depTA != null) {
Tree undoneTree = debinarizer.transformTree(treeDep);
undoneTree = subcategoryStripper.transformTree(undoneTree);
pq.restoreOriginalWords(undoneTree);
// pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
depTA.evaluate(undoneTree, goldTree, pwErr);
}
if (depLL != null && pq.getDependencyParser() != null) {
depLL.recordScore(pq.getDependencyParser(), pwErr);
}
Tree factTreeB;
if (pq.hasFactoredParse()) {
factTreeB = pq.getBestFactoredParse();
} else {
factTreeB = treeDep;
}
if (factDA != null) {
factDA.evaluate(factTreeB, goldTreeB, pwErr);
}
}
//Factored parser (1best) eval
if (factLB != null) {
factLB.evaluate(treeFact, transGoldTree, pwErr);
}
if (factChildSpecific != null) {
factChildSpecific.evaluate(treeFact, transGoldTree, pwErr);
}
if (factLA != null) {
factLA.evaluate(treeFact, transGoldTree, pwErr);
}
if (factTA != null) {
factTA.evaluate(tree, boundaryRemover.transformTree(goldTree), pwErr);
}
if (factLL != null && pq.getFactoredParser() != null) {
factLL.recordScore(pq.getFactoredParser(), pwErr);
}
if (factCB != null) {
factCB.evaluate(treeFact, transGoldTree, pwErr);
}
for (Eval eval : evals) {
eval.evaluate(treeFact, transGoldTree, pwErr);
}
if (parserQueryEvals != null) {
for (ParserQueryEval eval : parserQueryEvals) {
eval.evaluate(pq, transGoldTree, pwErr);
}
}
if (op.testOptions.evalb) {
// empty out scores just in case
nanScores(tree);
EvalbFormatWriter.writeEVALBline(treeFact, transGoldTree);
}
}
pwErr.println();
}
Aggregations