use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class TreeSpanScoring method countSpanErrors.
* Counts how many spans are present in goldTree, including
* preterminals, but not present in guessTree, along with how many
* spans are present in guessTree and not goldTree. Each one counts
* as an error, meaning that something like a mislabeled span or
* preterminal counts as two errors.
* <br>
* Span labels are compared using the basicCategory() function
* from the passed in TreebankLanguagePack.
public static int countSpanErrors(TreebankLanguagePack tlp, Tree goldTree, Tree guessTree) {
Set<Constituent> goldConstituents = goldTree.constituents(LabeledConstituent.factory());
Set<Constituent> guessConstituents = guessTree.constituents(LabeledConstituent.factory());
Set<Constituent> simpleGoldConstituents = simplifyConstituents(tlp, goldConstituents);
Set<Constituent> simpleGuessConstituents = simplifyConstituents(tlp, guessConstituents);
int errors = 0;
for (Constituent gold : simpleGoldConstituents) {
if (!simpleGuessConstituents.contains(gold)) {
for (Constituent guess : simpleGuessConstituents) {
if (!simpleGoldConstituents.contains(guess)) {
// The spans returned by constituents() doesn't include the
// preterminals, so we need to count those ourselves now
List<TaggedWord> goldWords = goldTree.taggedYield();
List<TaggedWord> guessWords = guessTree.taggedYield();
int len = Math.min(goldWords.size(), guessWords.size());
for (int i = 0; i < len; ++i) {
String goldTag = tlp.basicCategory(goldWords.get(i).tag());
String guessTag = tlp.basicCategory(guessWords.get(i).tag());
if (!goldTag.equals(guessTag)) {
// we count one error for each span that is present in the
// gold and not in the guess, and one error for each span that
// is present in the guess and not the gold, so this counts as
// two errors
errors += 2;
return errors;
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class TreeSpanScoring method simplifyConstituents.
public static Set<Constituent> simplifyConstituents(TreebankLanguagePack tlp, Set<Constituent> constituents) {
Set<Constituent> newConstituents = new HashSet<>();
for (Constituent con : constituents) {
if (!(con instanceof LabeledConstituent)) {
throw new AssertionError("Unexpected constituent type " + con.getClass());
LabeledConstituent labeled = (LabeledConstituent) con;
newConstituents.add(new LabeledConstituent(labeled.start(), labeled.end(), tlp.basicCategory(labeled.value())));
return newConstituents;
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class EvalbByCat method evaluate.
public void evaluate(Tree guess, Tree gold, PrintWriter pw) {
if (gold == null || guess == null) {
System.err.printf("%s: Cannot compare against a null gold or guess tree!%n", this.getClass().getName());
Map<Label, Set<Constituent>> guessDeps = makeObjectsByCat(guess);
Map<Label, Set<Constituent>> goldDeps = makeObjectsByCat(gold);
Set<Label> cats = Generics.newHashSet(guessDeps.keySet());
if (pw != null && runningAverages) {
pw.println("Labeled Bracketed Evaluation by Category");
for (Label cat : cats) {
Set<Constituent> thisGuessDeps = guessDeps.containsKey(cat) ? guessDeps.get(cat) : Generics.<Constituent>newHashSet();
Set<Constituent> thisGoldDeps = goldDeps.containsKey(cat) ? goldDeps.get(cat) : Generics.<Constituent>newHashSet();
double currentPrecision = precision(thisGuessDeps, thisGoldDeps);
double currentRecall = precision(thisGoldDeps, thisGuessDeps);
double currentF1 = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0);
precisions.incrementCount(cat, currentPrecision);
recalls.incrementCount(cat, currentRecall);
f1s.incrementCount(cat, currentF1);
precisions2.incrementCount(cat, thisGuessDeps.size() * currentPrecision);
pnums2.incrementCount(cat, thisGuessDeps.size());
recalls2.incrementCount(cat, thisGoldDeps.size() * currentRecall);
rnums2.incrementCount(cat, thisGoldDeps.size());
if (pw != null && runningAverages) {
pw.println(cat + "\tP: " + ((int) (currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int) (precisions.getCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int) (precisions2.getCount(cat) * 10000 / pnums2.getCount(cat))) / 100.0 + ")");
pw.println("\tR: " + ((int) (currentRecall * 10000)) / 100.0 + " (sent ave " + ((int) (recalls.getCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int) (recalls2.getCount(cat) * 10000 / rnums2.getCount(cat))) / 100.0 + ")");
double cF1 = 2.0 / (rnums2.getCount(cat) / recalls2.getCount(cat) + pnums2.getCount(cat) / precisions2.getCount(cat));
String emit = str + " F1: " + ((int) (currentF1 * 10000)) / 100.0 + " (sent ave " + ((int) (10000 * f1s.getCount(cat) / num)) / 100.0 + ", evalb " + ((int) (10000 * cF1)) / 100.0 + ")";
if (pw != null && runningAverages) {
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class ScrollableTreeJPanel method renderRows.
private void renderRows(Graphics2D g2, FontMetrics fM, Color defaultColor2) {
double nodeHeight = fM.getHeight();
double layerMultiplier = (1.0 + belowLineSkip + aboveLineSkip + parentSkip);
double layerHeight = nodeHeight * layerMultiplier;
//Draw the yield
List<HasWord> sentence = tree.yieldHasWord();
for (int i = 0; i < sentence.size(); i++) {
g2.drawString(sentence.get(i).word(), yieldOffsets[i], (float) (yieldHeight + layerHeight));
//Greedily draw the constituents
final float rowOrigin = (float) (yieldHeight + 2.0 * layerHeight);
List<List<IntPair>> rows = new ArrayList<>();
for (Constituent c : diffConstituents) {
for (int rowIdx = 0; rowIdx < diffConstituents.size(); rowIdx++) {
float rowHeight = rowOrigin + (float) (rowIdx * layerHeight);
int ext = (c.end() == (yieldOffsets.length - 1)) ? 0 : 1;
if (rowIdx >= rows.size()) {
rows.add(new ArrayList<>());
rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
double nodeWidth = fM.stringWidth(c.value());
g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
try {
g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
} catch (ArrayIndexOutOfBoundsException e) {
// This happens if yield of two compared trees do not match. Just ignore it for now
// System.err.printf("yieldOffsets.length is %d, c.start() is %d, c.end() is %d, ext is %d%n", yieldOffsets.length, c.start(), c.end(), ext);
} else {
boolean foundOverlap = false;
for (IntPair span : rows.get(rowIdx)) {
if (doesOverlap(c, span)) {
foundOverlap = true;
if (!foundOverlap) {
rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
double nodeWidth = fM.stringWidth(c.value());
g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.
the class Tdiff method main.
* @param args
public static void main(String[] args) {
if (args.length != 2) {
System.out.println("Usage: java Tdiff tree1 tree2");
File tree1Path = new File(args[0]);
File tree2Path = new File(args[1]);
try {
TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path)));
TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path)));
Tree t1 = tR1.readTree();
Tree t2 = tR2.readTree();
Set<Constituent> t1Diff = markDiff(t1, t2);
for (Constituent c : t1Diff) System.out.println(c);
} catch (FileNotFoundException e) {"File not found!");
} catch (IOException e) {"Unable to read file!");