Search in sources :

Example 1 with Eval

use of edu.stanford.nlp.parser.metrics.Eval in project CoreNLP by stanfordnlp.

the class EvaluateTreebank method testOnTreebank.

/** Test the parser on a treebank. Parses will be written to stdout, and
   *  various other information will be written to stderr and stdout,
   *  particularly if <code>op.testOptions.verbose</code> is true.
   *
   *  @param testTreebank The treebank to parse
   *  @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
   *          of the parser on the treebank.
   */
public double testOnTreebank(Treebank testTreebank) {
    log.info("Testing on treebank");
    Timing treebankTotalTimer = new Timing();
    TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
    TreebankLangParserParams tlpParams = op.tlpParams;
    TreebankLanguagePack tlp = op.langpack();
    PrintWriter pwOut, pwErr;
    if (op.testOptions.quietEvaluation) {
        NullOutputStream quiet = new NullOutputStream();
        pwOut = tlpParams.pw(quiet);
        pwErr = tlpParams.pw(quiet);
    } else {
        pwOut = tlpParams.pw();
        pwErr = tlpParams.pw(System.err);
    }
    if (op.testOptions.verbose) {
        pwErr.print("Testing ");
        pwErr.println(testTreebank.textualSummary(tlp));
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.initEVALBfiles(tlpParams);
    }
    PrintWriter pwFileOut = null;
    if (op.testOptions.writeOutputFiles) {
        String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
        try {
            pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    PrintWriter pwStats = null;
    if (op.testOptions.outputkBestEquivocation != null) {
        try {
            pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    if (op.testOptions.testingThreads != 1) {
        MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
        LinkedList<Tree> goldTrees = new LinkedList<>();
        for (Tree goldTree : testTreebank) {
            List<? extends HasWord> sentence = getInputSentence(goldTree);
            goldTrees.add(goldTree);
            pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
            wrapper.put(sentence);
            while (wrapper.peek()) {
                ParserQuery pq = wrapper.poll();
                goldTree = goldTrees.poll();
                processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
            }
        }
        // for tree iterator
        wrapper.join();
        while (wrapper.peek()) {
            ParserQuery pq = wrapper.poll();
            Tree goldTree = goldTrees.poll();
            processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
        }
    } else {
        ParserQuery pq = pqFactory.parserQuery();
        for (Tree goldTree : testTreebank) {
            final List<CoreLabel> sentence = getInputSentence(goldTree);
            pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
            pq.parseAndReport(sentence, pwErr);
            processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
        }
    // for tree iterator
    }
    //Done parsing...print the results of the evaluations
    treebankTotalTimer.done("Testing on treebank");
    if (op.testOptions.quietEvaluation) {
        pwErr = tlpParams.pw(System.err);
    }
    if (saidMemMessage) {
        ParserUtils.printOutOfMemory(pwErr);
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.closeEVALBfiles();
    }
    if (numSkippedEvals != 0) {
        pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
    }
    // only created here so we know what parser types are supported...
    ParserQuery pq = pqFactory.parserQuery();
    if (summary) {
        if (pcfgLB != null)
            pcfgLB.display(false, pwErr);
        if (pcfgChildSpecific != null)
            pcfgChildSpecific.display(false, pwErr);
        if (pcfgLA != null)
            pcfgLA.display(false, pwErr);
        if (pcfgCB != null)
            pcfgCB.display(false, pwErr);
        if (pcfgDA != null)
            pcfgDA.display(false, pwErr);
        if (pcfgTA != null)
            pcfgTA.display(false, pwErr);
        if (pcfgLL != null && pq.getPCFGParser() != null)
            pcfgLL.display(false, pwErr);
        if (depDA != null)
            depDA.display(false, pwErr);
        if (depTA != null)
            depTA.display(false, pwErr);
        if (depLL != null && pq.getDependencyParser() != null)
            depLL.display(false, pwErr);
        if (factLB != null)
            factLB.display(false, pwErr);
        if (factChildSpecific != null)
            factChildSpecific.display(false, pwErr);
        if (factLA != null)
            factLA.display(false, pwErr);
        if (factCB != null)
            factCB.display(false, pwErr);
        if (factDA != null)
            factDA.display(false, pwErr);
        if (factTA != null)
            factTA.display(false, pwErr);
        if (factLL != null && pq.getFactoredParser() != null)
            factLL.display(false, pwErr);
        if (pcfgCatE != null)
            pcfgCatE.display(false, pwErr);
        for (Eval eval : evals) {
            eval.display(false, pwErr);
        }
        for (BestOfTopKEval eval : topKEvals) {
            eval.display(false, pwErr);
        }
    }
    // these ones only have a display mode, so display if turned on!!
    if (pcfgRUO != null)
        pcfgRUO.display(true, pwErr);
    if (pcfgCUO != null)
        pcfgCUO.display(true, pwErr);
    if (tsv) {
        NumberFormat nf = new DecimalFormat("0.00");
        pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getDependencyParser() != null && factDA != null)
            pwErr.print(nf.format(factDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getExactPercent()));
        pwErr.print("\t");
        if (pcfgLB != null)
            pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getDependencyParser() != null && depDA != null)
            pwErr.print(nf.format(depDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getPCFGParser() != null && factTA != null)
            pwErr.print(nf.format(factTA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(factLB.getNum());
        pwErr.println();
    }
    double f1 = 0.0;
    if (factLB != null) {
        f1 = factLB.getEvalbF1();
    }
    //Close files (if necessary)
    if (pwFileOut != null)
        pwFileOut.close();
    if (pwStats != null)
        pwStats.close();
    if (parserQueryEvals != null) {
        for (ParserQueryEval parserQueryEval : parserQueryEvals) {
            parserQueryEval.display(false, pwErr);
        }
    }
    return f1;
}
Also used : DecimalFormat(java.text.DecimalFormat) TreePrint(edu.stanford.nlp.trees.TreePrint) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) LeafAncestorEval(edu.stanford.nlp.parser.metrics.LeafAncestorEval) AbstractEval(edu.stanford.nlp.parser.metrics.AbstractEval) TaggingEval(edu.stanford.nlp.parser.metrics.TaggingEval) TopMatchEval(edu.stanford.nlp.parser.metrics.TopMatchEval) FilteredEval(edu.stanford.nlp.parser.metrics.FilteredEval) Eval(edu.stanford.nlp.parser.metrics.Eval) UnlabeledAttachmentEval(edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) PrintWriter(java.io.PrintWriter) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) IOException(java.io.IOException) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) LinkedList(java.util.LinkedList) ParsingThreadsafeProcessor(edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor) FileOutputStream(java.io.FileOutputStream) Timing(edu.stanford.nlp.util.Timing) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) NullOutputStream(edu.stanford.nlp.io.NullOutputStream) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) NumberFormat(java.text.NumberFormat)

Example 2 with Eval

use of edu.stanford.nlp.parser.metrics.Eval in project CoreNLP by stanfordnlp.

the class EvaluateTreebank method processResults.

public void processResults(ParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) {
    if (pq.saidMemMessage()) {
        saidMemMessage = true;
    }
    Tree tree;
    List<? extends HasWord> sentence = pq.originalSentence();
    try {
        tree = pq.getBestParse();
    } catch (NoSuchParseException e) {
        tree = null;
    }
    List<ScoredObject<Tree>> kbestPCFGTrees = null;
    if (tree != null && kbestPCFG > 0) {
        kbestPCFGTrees = pq.getKBestPCFGParses(kbestPCFG);
    }
    //combo parse goes to pwOut (System.out)
    if (op.testOptions.verbose) {
        pwOut.println("ComboParser best");
        Tree ot = tree;
        if (ot != null && !op.tlpParams.treebankLanguagePack().isStartSymbol(ot.value())) {
            ot = ot.treeFactory().newTreeNode(op.tlpParams.treebankLanguagePack().startSymbol(), Collections.singletonList(ot));
        }
        treePrint.printTree(ot, pwOut);
    } else {
        treePrint.printTree(tree, pwOut);
    }
    // print various statistics
    if (tree != null) {
        if (op.testOptions.printAllBestParses) {
            List<ScoredObject<Tree>> parses = pq.getBestPCFGParses();
            int sz = parses.size();
            if (sz > 1) {
                pwOut.println("There were " + sz + " best PCFG parses with score " + parses.get(0).score() + '.');
                Tree transGoldTree = collinizer.transformTree(goldTree);
                int iii = 0;
                for (ScoredObject<Tree> sot : parses) {
                    iii++;
                    Tree tb = sot.object();
                    Tree tbd = debinarizer.transformTree(tb);
                    tbd = subcategoryStripper.transformTree(tbd);
                    pq.restoreOriginalWords(tbd);
                    pwOut.println("PCFG Parse #" + iii + " with score " + tbd.score());
                    tbd.pennPrint(pwOut);
                    Tree tbtr = collinizer.transformTree(tbd);
                    // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
                    kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
                }
            }
        } else // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
        if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) {
            List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
            Tree transGoldTree = collinizer.transformTree(goldTree);
            int i = 0;
            for (ScoredObject<Tree> tp : trees) {
                i++;
                pwOut.println("PCFG Parse #" + i + " with score " + tp.score());
                Tree tbd = tp.object();
                tbd.pennPrint(pwOut);
                Tree tbtr = collinizer.transformTree(tbd);
                kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
            }
        } else // Chart parser (factored) n-best list
        if (op.testOptions.printFactoredKGood > 0 && pq.hasFactoredParse()) {
            // DZ: debug n best trees
            List<ScoredObject<Tree>> trees = pq.getKGoodFactoredParses(op.testOptions.printFactoredKGood);
            Tree transGoldTree = collinizer.transformTree(goldTree);
            int ii = 0;
            for (ScoredObject<Tree> tp : trees) {
                ii++;
                pwOut.println("Factored Parse #" + ii + " with score " + tp.score());
                Tree tbd = tp.object();
                tbd.pennPrint(pwOut);
                Tree tbtr = collinizer.transformTree(tbd);
                kGoodLB.evaluate(tbtr, transGoldTree, pwOut);
            }
        } else //1-best output
        if (pwFileOut != null) {
            pwFileOut.println(tree.toString());
        }
        //Print the derivational entropy
        if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) {
            List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
            double[] logScores = new double[trees.size()];
            int treeId = 0;
            for (ScoredObject<Tree> kBestTree : trees) logScores[treeId++] = kBestTree.score();
            //Re-normalize
            double entropy = 0.0;
            double denom = ArrayMath.logSum(logScores);
            for (double logScore : logScores) {
                double logPr = logScore - denom;
                entropy += Math.exp(logPr) * (logPr / Math.log(2));
            }
            //Convert to bits
            entropy *= -1;
            pwStats.printf("%f\t%d\t%d\n", entropy, trees.size(), sentence.size());
        }
    }
    // Perform various evaluations specified by the user
    if (tree != null) {
        //Strip subcategories and remove punctuation for evaluation
        tree = subcategoryStripper.transformTree(tree);
        Tree treeFact = collinizer.transformTree(tree);
        //Setup the gold tree
        if (op.testOptions.verbose) {
            pwOut.println("Correct parse");
            treePrint.printTree(goldTree, pwOut);
        }
        Tree transGoldTree = collinizer.transformTree(goldTree);
        if (transGoldTree != null)
            transGoldTree = subcategoryStripper.transformTree(transGoldTree);
        //Can't do evaluation in these two cases
        if (transGoldTree == null) {
            pwErr.println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
            goldTree.pennPrint(pwErr);
            numSkippedEvals++;
            return;
        } else if (treeFact == null) {
            pwErr.println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
            tree.pennPrint(pwErr);
            numSkippedEvals++;
            return;
        } else if (treeFact.yield().size() != transGoldTree.yield().size()) {
            List<Label> fYield = treeFact.yield();
            List<Label> gYield = transGoldTree.yield();
            pwErr.println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
            pwErr.printf("  sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.size(), goldTree.yield().size(), fYield.size(), tree.yield().size());
            pwErr.println("  gold: " + SentenceUtils.listToString(gYield, true));
            pwErr.println("  pars: " + SentenceUtils.listToString(fYield, true));
            numSkippedEvals++;
            return;
        }
        if (topKEvals.size() > 0) {
            List<Tree> transGuesses = new ArrayList<>();
            int kbest = Math.min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.size());
            for (ScoredObject<Tree> guess : kbestPCFGTrees.subList(0, kbest)) {
                transGuesses.add(collinizer.transformTree(guess.object()));
            }
            for (BestOfTopKEval eval : topKEvals) {
                eval.evaluate(transGuesses, transGoldTree, pwErr);
            }
        }
        //PCFG eval
        Tree treePCFG = pq.getBestPCFGParse();
        if (treePCFG != null) {
            Tree treePCFGeval = collinizer.transformTree(treePCFG);
            if (pcfgLB != null) {
                pcfgLB.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgChildSpecific != null) {
                pcfgChildSpecific.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgLA != null) {
                pcfgLA.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgCB != null) {
                pcfgCB.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgDA != null) {
                // Re-index the leaves after Collinization, stripping traces, etc.
                treePCFGeval.indexLeaves(true);
                transGoldTree.indexLeaves(true);
                pcfgDA.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgTA != null) {
                pcfgTA.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgLL != null && pq.getPCFGParser() != null) {
                pcfgLL.recordScore(pq.getPCFGParser(), pwErr);
            }
            if (pcfgRUO != null) {
                pcfgRUO.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgCUO != null) {
                pcfgCUO.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgCatE != null) {
                pcfgCatE.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
        }
        //Dependency eval
        // todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
        Tree treeDep = pq.getBestDependencyParse(false);
        if (treeDep != null) {
            Tree goldTreeB = binarizerOnly.transformTree(goldTree);
            Tree goldTreeEval = goldTree.deepCopy();
            goldTreeEval.indexLeaves(true);
            goldTreeEval.percolateHeads(op.langpack().headFinder());
            Tree depDAEval = pq.getBestDependencyParse(true);
            depDAEval.indexLeaves(true);
            depDAEval.percolateHeadIndices();
            if (depDA != null) {
                depDA.evaluate(depDAEval, goldTreeEval, pwErr);
            }
            if (depTA != null) {
                Tree undoneTree = debinarizer.transformTree(treeDep);
                undoneTree = subcategoryStripper.transformTree(undoneTree);
                pq.restoreOriginalWords(undoneTree);
                // pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
                depTA.evaluate(undoneTree, goldTree, pwErr);
            }
            if (depLL != null && pq.getDependencyParser() != null) {
                depLL.recordScore(pq.getDependencyParser(), pwErr);
            }
            Tree factTreeB;
            if (pq.hasFactoredParse()) {
                factTreeB = pq.getBestFactoredParse();
            } else {
                factTreeB = treeDep;
            }
            if (factDA != null) {
                factDA.evaluate(factTreeB, goldTreeB, pwErr);
            }
        }
        //Factored parser (1best) eval
        if (factLB != null) {
            factLB.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (factChildSpecific != null) {
            factChildSpecific.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (factLA != null) {
            factLA.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (factTA != null) {
            factTA.evaluate(tree, boundaryRemover.transformTree(goldTree), pwErr);
        }
        if (factLL != null && pq.getFactoredParser() != null) {
            factLL.recordScore(pq.getFactoredParser(), pwErr);
        }
        if (factCB != null) {
            factCB.evaluate(treeFact, transGoldTree, pwErr);
        }
        for (Eval eval : evals) {
            eval.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (parserQueryEvals != null) {
            for (ParserQueryEval eval : parserQueryEvals) {
                eval.evaluate(pq, transGoldTree, pwErr);
            }
        }
        if (op.testOptions.evalb) {
            // empty out scores just in case
            nanScores(tree);
            EvalbFormatWriter.writeEVALBline(treeFact, transGoldTree);
        }
    }
    pwErr.println();
}
Also used : ArrayList(java.util.ArrayList) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) TreePrint(edu.stanford.nlp.trees.TreePrint) NoSuchParseException(edu.stanford.nlp.parser.common.NoSuchParseException) ScoredObject(edu.stanford.nlp.util.ScoredObject) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) LeafAncestorEval(edu.stanford.nlp.parser.metrics.LeafAncestorEval) AbstractEval(edu.stanford.nlp.parser.metrics.AbstractEval) TaggingEval(edu.stanford.nlp.parser.metrics.TaggingEval) TopMatchEval(edu.stanford.nlp.parser.metrics.TopMatchEval) FilteredEval(edu.stanford.nlp.parser.metrics.FilteredEval) Eval(edu.stanford.nlp.parser.metrics.Eval) UnlabeledAttachmentEval(edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval)

Aggregations

AbstractEval (edu.stanford.nlp.parser.metrics.AbstractEval)2 BestOfTopKEval (edu.stanford.nlp.parser.metrics.BestOfTopKEval)2 Eval (edu.stanford.nlp.parser.metrics.Eval)2 FilteredEval (edu.stanford.nlp.parser.metrics.FilteredEval)2 LeafAncestorEval (edu.stanford.nlp.parser.metrics.LeafAncestorEval)2 ParserQueryEval (edu.stanford.nlp.parser.metrics.ParserQueryEval)2 TaggingEval (edu.stanford.nlp.parser.metrics.TaggingEval)2 TopMatchEval (edu.stanford.nlp.parser.metrics.TopMatchEval)2 UnlabeledAttachmentEval (edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval)2 Tree (edu.stanford.nlp.trees.Tree)2 TreePrint (edu.stanford.nlp.trees.TreePrint)2 ArrayList (java.util.ArrayList)2 LinkedList (java.util.LinkedList)2 List (java.util.List)2 NullOutputStream (edu.stanford.nlp.io.NullOutputStream)1 NoSuchParseException (edu.stanford.nlp.parser.common.NoSuchParseException)1 ParserQuery (edu.stanford.nlp.parser.common.ParserQuery)1 ParsingThreadsafeProcessor (edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor)1 TreebankLanguagePack (edu.stanford.nlp.trees.TreebankLanguagePack)1 ScoredObject (edu.stanford.nlp.util.ScoredObject)1