use of edu.stanford.nlp.trees.LabeledScoredConstituentFactory in project CoreNLP by stanfordnlp.
the class Evalb method emitSortedTrees.
private static void emitSortedTrees(PriorityQueue<Triple<Double, Tree, Tree>> queue, int worstKTreesToEmit, String filePrefix) {
if (queue == null)
log.info("Queue was not initialized properly");
try {
final PrintWriter guessPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.guess"), "UTF-8")));
final PrintWriter goldPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.gold"), "UTF-8")));
final ConstituentFactory cFact = new LabeledScoredConstituentFactory();
final PrintWriter guessDepPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.guess.deps"), "UTF-8")));
final PrintWriter goldDepPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.gold.deps"), "UTF-8")));
System.out.printf("F1s of %d worst trees:\n", worstKTreesToEmit);
for (int i = 0; queue.peek() != null && i < worstKTreesToEmit; i++) {
final Triple<Double, Tree, Tree> trees = queue.poll();
System.out.println(trees.first());
//Output the trees
goldPw.println(trees.second().toString());
guessPw.println(trees.third().toString());
//Output the set differences
Set<Constituent> goldDeps = Generics.newHashSet();
goldDeps.addAll(trees.second().constituents(cFact));
goldDeps.removeAll(trees.third().constituents(cFact));
for (Constituent c : goldDeps) goldDepPw.print(c.toString() + " ");
goldDepPw.println();
Set<Constituent> guessDeps = Generics.newHashSet();
guessDeps.addAll(trees.third().constituents(cFact));
guessDeps.removeAll(trees.second().constituents(cFact));
for (Constituent c : guessDeps) guessDepPw.print(c.toString() + " ");
guessDepPw.println();
}
guessPw.close();
goldPw.close();
goldDepPw.close();
guessDepPw.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.trees.LabeledScoredConstituentFactory in project CoreNLP by stanfordnlp.
the class NodePruner method prune.
List<Tree> prune(List<Tree> treeList, Label label, int start, int end) {
// get reference tree
if (treeList.size() == 1) {
return treeList;
}
Tree testTree = treeList.get(0).treeFactory().newTreeNode(label, treeList);
Tree tempTree = parser.extractBestParse(label.value(), start, end);
// parser.restoreUnaries(tempTree);
Tree pcfgTree = debinarizer.transformTree(tempTree);
Set<Constituent> pcfgConstituents = pcfgTree.constituents(new LabeledScoredConstituentFactory());
// delete child labels that are not in reference but do not cross reference
List<Tree> prunedChildren = new ArrayList<>();
int childStart = 0;
for (int c = 0, numCh = testTree.numChildren(); c < numCh; c++) {
Tree child = testTree.getChild(c);
boolean isExtra = true;
int childEnd = childStart + child.yield().size();
Constituent childConstituent = new LabeledScoredConstituent(childStart, childEnd, child.label(), 0);
if (pcfgConstituents.contains(childConstituent)) {
isExtra = false;
}
if (childConstituent.crosses(pcfgConstituents)) {
isExtra = false;
}
if (child.isLeaf() || child.isPreTerminal()) {
isExtra = false;
}
if (pcfgTree.yield().size() != testTree.yield().size()) {
isExtra = false;
}
if (!label.value().startsWith("NP^NP")) {
isExtra = false;
}
if (isExtra) {
log.info("Pruning: " + child.label() + " from " + (childStart + start) + " to " + (childEnd + start));
log.info("Was: " + testTree + " vs " + pcfgTree);
prunedChildren.addAll(child.getChildrenAsList());
} else {
prunedChildren.add(child);
}
childStart = childEnd;
}
return prunedChildren;
}
Aggregations