use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class Tdiff method markDiff.
/**
* Marks bracketings in t2 not in t1 using the DoAnnotation field.
* Returns a list of brackets in t1 not in t2.
*
* @param t1
* @param t2
* @return A list of brackets in t1 not in t2;
*/
public static Set<Constituent> markDiff(Tree t1, Tree t2) {
// if (t1 == null || t2 == null || ! t1.value().equals(t2.value())) {
// System.err.printf("t1 value is %s; t2 value is %s; t1 is %s t2 is %s", t1.value(), t2.value(), t1, t2);
// }
Set<Constituent> t1Labels = (t1 == null) ? Generics.<Constituent>newHashSet() : t1.constituents(cf);
if (t2 != null) {
t2.setSpans();
for (Tree subTree : t2) {
if (subTree.isPhrasal()) {
IntPair span = subTree.getSpan();
Constituent c = cf.newConstituent(span.getSource(), span.getTarget(), subTree.label(), 0.0);
if (t1Labels.contains(c)) {
t1Labels.remove(c);
((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, false);
} else {
((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, true);
}
}
}
}
return t1Labels;
}
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class Evalb method emitSortedTrees.
private static void emitSortedTrees(PriorityQueue<Triple<Double, Tree, Tree>> queue, int worstKTreesToEmit, String filePrefix) {
if (queue == null)
log.info("Queue was not initialized properly");
try {
final PrintWriter guessPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.guess"), "UTF-8")));
final PrintWriter goldPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.gold"), "UTF-8")));
final ConstituentFactory cFact = new LabeledScoredConstituentFactory();
final PrintWriter guessDepPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.guess.deps"), "UTF-8")));
final PrintWriter goldDepPw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePrefix + ".kworst.gold.deps"), "UTF-8")));
System.out.printf("F1s of %d worst trees:\n", worstKTreesToEmit);
for (int i = 0; queue.peek() != null && i < worstKTreesToEmit; i++) {
final Triple<Double, Tree, Tree> trees = queue.poll();
System.out.println(trees.first());
//Output the trees
goldPw.println(trees.second().toString());
guessPw.println(trees.third().toString());
//Output the set differences
Set<Constituent> goldDeps = Generics.newHashSet();
goldDeps.addAll(trees.second().constituents(cFact));
goldDeps.removeAll(trees.third().constituents(cFact));
for (Constituent c : goldDeps) goldDepPw.print(c.toString() + " ");
goldDepPw.println();
Set<Constituent> guessDeps = Generics.newHashSet();
guessDeps.addAll(trees.third().constituents(cFact));
guessDeps.removeAll(trees.second().constituents(cFact));
for (Constituent c : guessDeps) guessDepPw.print(c.toString() + " ");
guessDepPw.println();
}
guessPw.close();
goldPw.close();
goldDepPw.close();
guessDepPw.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class EvalbByCat method makeObjectsByCat.
private Map<Label, Set<Constituent>> makeObjectsByCat(Tree t) {
Map<Label, Set<Constituent>> objMap = Generics.newHashMap();
Set<Constituent> objSet = makeObjects(t);
for (Constituent lc : objSet) {
Label l = lc.label();
if (!objMap.keySet().contains(l)) {
objMap.put(l, Generics.<Constituent>newHashSet());
}
objMap.get(l).add(lc);
}
return objMap;
}
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class NodePruner method prune.
List<Tree> prune(List<Tree> treeList, Label label, int start, int end) {
// get reference tree
if (treeList.size() == 1) {
return treeList;
}
Tree testTree = treeList.get(0).treeFactory().newTreeNode(label, treeList);
Tree tempTree = parser.extractBestParse(label.value(), start, end);
// parser.restoreUnaries(tempTree);
Tree pcfgTree = debinarizer.transformTree(tempTree);
Set<Constituent> pcfgConstituents = pcfgTree.constituents(new LabeledScoredConstituentFactory());
// delete child labels that are not in reference but do not cross reference
List<Tree> prunedChildren = new ArrayList<>();
int childStart = 0;
for (int c = 0, numCh = testTree.numChildren(); c < numCh; c++) {
Tree child = testTree.getChild(c);
boolean isExtra = true;
int childEnd = childStart + child.yield().size();
Constituent childConstituent = new LabeledScoredConstituent(childStart, childEnd, child.label(), 0);
if (pcfgConstituents.contains(childConstituent)) {
isExtra = false;
}
if (childConstituent.crosses(pcfgConstituents)) {
isExtra = false;
}
if (child.isLeaf() || child.isPreTerminal()) {
isExtra = false;
}
if (pcfgTree.yield().size() != testTree.yield().size()) {
isExtra = false;
}
if (!label.value().startsWith("NP^NP")) {
isExtra = false;
}
if (isExtra) {
log.info("Pruning: " + child.label() + " from " + (childStart + start) + " to " + (childEnd + start));
log.info("Was: " + testTree + " vs " + pcfgTree);
prunedChildren.addAll(child.getChildrenAsList());
} else {
prunedChildren.add(child);
}
childStart = childEnd;
}
return prunedChildren;
}
Aggregations