Search in sources :

Example 1 with Constituent

use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.

the class TreeSpanScoring method countSpanErrors.

/**
   * Counts how many spans are present in goldTree, including
   * preterminals, but not present in guessTree, along with how many
   * spans are present in guessTree and not goldTree.  Each one counts
   * as an error, meaning that something like a mislabeled span or
   * preterminal counts as two errors.
   * <br>
   * Span labels are compared using the basicCategory() function
   * from the passed in TreebankLanguagePack.
   */
public static int countSpanErrors(TreebankLanguagePack tlp, Tree goldTree, Tree guessTree) {
    Set<Constituent> goldConstituents = goldTree.constituents(LabeledConstituent.factory());
    Set<Constituent> guessConstituents = guessTree.constituents(LabeledConstituent.factory());
    Set<Constituent> simpleGoldConstituents = simplifyConstituents(tlp, goldConstituents);
    Set<Constituent> simpleGuessConstituents = simplifyConstituents(tlp, guessConstituents);
    //System.out.println(simpleGoldConstituents);
    //System.out.println(simpleGuessConstituents);
    int errors = 0;
    for (Constituent gold : simpleGoldConstituents) {
        if (!simpleGuessConstituents.contains(gold)) {
            ++errors;
        }
    }
    for (Constituent guess : simpleGuessConstituents) {
        if (!simpleGoldConstituents.contains(guess)) {
            ++errors;
        }
    }
    // The spans returned by constituents() doesn't include the
    // preterminals, so we need to count those ourselves now
    List<TaggedWord> goldWords = goldTree.taggedYield();
    List<TaggedWord> guessWords = guessTree.taggedYield();
    int len = Math.min(goldWords.size(), guessWords.size());
    for (int i = 0; i < len; ++i) {
        String goldTag = tlp.basicCategory(goldWords.get(i).tag());
        String guessTag = tlp.basicCategory(guessWords.get(i).tag());
        if (!goldTag.equals(guessTag)) {
            // we count one error for each span that is present in the
            // gold and not in the guess, and one error for each span that
            // is present in the guess and not the gold, so this counts as
            // two errors
            errors += 2;
        }
    }
    return errors;
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent)

Example 2 with Constituent

use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.

the class TreeSpanScoring method simplifyConstituents.

public static Set<Constituent> simplifyConstituents(TreebankLanguagePack tlp, Set<Constituent> constituents) {
    Set<Constituent> newConstituents = new HashSet<>();
    for (Constituent con : constituents) {
        if (!(con instanceof LabeledConstituent)) {
            throw new AssertionError("Unexpected constituent type " + con.getClass());
        }
        LabeledConstituent labeled = (LabeledConstituent) con;
        newConstituents.add(new LabeledConstituent(labeled.start(), labeled.end(), tlp.basicCategory(labeled.value())));
    }
    return newConstituents;
}
Also used : LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent) HashSet(java.util.HashSet)

Example 3 with Constituent

use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.

the class EvalbByCat method evaluate.

@Override
public void evaluate(Tree guess, Tree gold, PrintWriter pw) {
    if (gold == null || guess == null) {
        System.err.printf("%s: Cannot compare against a null gold or guess tree!%n", this.getClass().getName());
        return;
    }
    Map<Label, Set<Constituent>> guessDeps = makeObjectsByCat(guess);
    Map<Label, Set<Constituent>> goldDeps = makeObjectsByCat(gold);
    Set<Label> cats = Generics.newHashSet(guessDeps.keySet());
    cats.addAll(goldDeps.keySet());
    if (pw != null && runningAverages) {
        pw.println("========================================");
        pw.println("Labeled Bracketed Evaluation by Category");
        pw.println("========================================");
    }
    ++num;
    for (Label cat : cats) {
        Set<Constituent> thisGuessDeps = guessDeps.containsKey(cat) ? guessDeps.get(cat) : Generics.<Constituent>newHashSet();
        Set<Constituent> thisGoldDeps = goldDeps.containsKey(cat) ? goldDeps.get(cat) : Generics.<Constituent>newHashSet();
        double currentPrecision = precision(thisGuessDeps, thisGoldDeps);
        double currentRecall = precision(thisGoldDeps, thisGuessDeps);
        double currentF1 = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0);
        precisions.incrementCount(cat, currentPrecision);
        recalls.incrementCount(cat, currentRecall);
        f1s.incrementCount(cat, currentF1);
        precisions2.incrementCount(cat, thisGuessDeps.size() * currentPrecision);
        pnums2.incrementCount(cat, thisGuessDeps.size());
        recalls2.incrementCount(cat, thisGoldDeps.size() * currentRecall);
        rnums2.incrementCount(cat, thisGoldDeps.size());
        if (pw != null && runningAverages) {
            pw.println(cat + "\tP: " + ((int) (currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int) (precisions.getCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int) (precisions2.getCount(cat) * 10000 / pnums2.getCount(cat))) / 100.0 + ")");
            pw.println("\tR: " + ((int) (currentRecall * 10000)) / 100.0 + " (sent ave " + ((int) (recalls.getCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int) (recalls2.getCount(cat) * 10000 / rnums2.getCount(cat))) / 100.0 + ")");
            double cF1 = 2.0 / (rnums2.getCount(cat) / recalls2.getCount(cat) + pnums2.getCount(cat) / precisions2.getCount(cat));
            String emit = str + " F1: " + ((int) (currentF1 * 10000)) / 100.0 + " (sent ave " + ((int) (10000 * f1s.getCount(cat) / num)) / 100.0 + ", evalb " + ((int) (10000 * cF1)) / 100.0 + ")";
            pw.println(emit);
        }
    }
    if (pw != null && runningAverages) {
        pw.println("========================================");
    }
}
Also used : Set(java.util.Set) Label(edu.stanford.nlp.ling.Label) Constituent(edu.stanford.nlp.trees.Constituent)

Example 4 with Constituent

use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.

the class ScrollableTreeJPanel method renderRows.

private void renderRows(Graphics2D g2, FontMetrics fM, Color defaultColor2) {
    double nodeHeight = fM.getHeight();
    double layerMultiplier = (1.0 + belowLineSkip + aboveLineSkip + parentSkip);
    double layerHeight = nodeHeight * layerMultiplier;
    //Draw the yield
    List<HasWord> sentence = tree.yieldHasWord();
    for (int i = 0; i < sentence.size(); i++) {
        g2.drawString(sentence.get(i).word(), yieldOffsets[i], (float) (yieldHeight + layerHeight));
    }
    //Greedily draw the constituents
    final float rowOrigin = (float) (yieldHeight + 2.0 * layerHeight);
    List<List<IntPair>> rows = new ArrayList<>();
    for (Constituent c : diffConstituents) {
        for (int rowIdx = 0; rowIdx < diffConstituents.size(); rowIdx++) {
            float rowHeight = rowOrigin + (float) (rowIdx * layerHeight);
            int ext = (c.end() == (yieldOffsets.length - 1)) ? 0 : 1;
            if (rowIdx >= rows.size()) {
                rows.add(new ArrayList<>());
                rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
                double nodeWidth = fM.stringWidth(c.value());
                g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
                try {
                    g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
                } catch (ArrayIndexOutOfBoundsException e) {
                // This happens if yield of two compared trees do not match.  Just ignore it for now
                // System.err.printf("yieldOffsets.length is %d, c.start() is %d, c.end() is %d, ext is %d%n", yieldOffsets.length, c.start(), c.end(), ext);
                }
                break;
            } else {
                boolean foundOverlap = false;
                for (IntPair span : rows.get(rowIdx)) {
                    if (doesOverlap(c, span)) {
                        foundOverlap = true;
                        break;
                    }
                }
                if (!foundOverlap) {
                    rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
                    double nodeWidth = fM.stringWidth(c.value());
                    g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
                    g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
                    break;
                }
            }
        }
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) IntPair(edu.stanford.nlp.util.IntPair) List(java.util.List) Constituent(edu.stanford.nlp.trees.Constituent)

Example 5 with Constituent

use of edu.stanford.nlp.trees.Constituent in project CoreNLP by stanfordnlp.

the class Tdiff method main.

/**
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 2) {
        System.out.println("Usage: java Tdiff tree1 tree2");
        return;
    }
    File tree1Path = new File(args[0]);
    File tree2Path = new File(args[1]);
    try {
        TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
        TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path)));
        TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path)));
        Tree t1 = tR1.readTree();
        Tree t2 = tR2.readTree();
        Set<Constituent> t1Diff = markDiff(t1, t2);
        System.out.println(t2.pennString());
        System.out.println();
        for (Constituent c : t1Diff) System.out.println(c);
    } catch (FileNotFoundException e) {
        log.info("File not found!");
    } catch (IOException e) {
        log.info("Unable to read file!");
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent)

Aggregations

Constituent (edu.stanford.nlp.trees.Constituent)9 LabeledConstituent (edu.stanford.nlp.trees.LabeledConstituent)4 Tree (edu.stanford.nlp.trees.Tree)4 Label (edu.stanford.nlp.ling.Label)2 LabeledScoredConstituentFactory (edu.stanford.nlp.trees.LabeledScoredConstituentFactory)2 IntPair (edu.stanford.nlp.util.IntPair)2 Set (java.util.Set)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 HasWord (edu.stanford.nlp.ling.HasWord)1 TaggedWord (edu.stanford.nlp.ling.TaggedWord)1 ConstituentFactory (edu.stanford.nlp.trees.ConstituentFactory)1 LabeledScoredConstituent (edu.stanford.nlp.trees.LabeledScoredConstituent)1 LabeledScoredTreeReaderFactory (edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory)1 TreeReader (edu.stanford.nlp.trees.TreeReader)1 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)1 BufferedWriter (java.io.BufferedWriter)1 FileNotFoundException (java.io.FileNotFoundException)1 FileOutputStream (java.io.FileOutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1