Search in sources :

Example 1 with TreePrint

use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.

the class LexicalizedParserITest method testChineseDependenciesSemanticHead.

public void testChineseDependenciesSemanticHead() {
    Tree tree = chineseParser.parse(chineseTest2);
    compareSingleOutput(tree, false, chinesePennPrint, expectedChineseTree2);
    compareSingleOutput(tree, false, chineseTypDepPrint, expectedChineseDeps2sd);
    TreePrint paramsTreePrint = new TreePrint("typedDependencies", "basicDependencies", chineseParser.treebankLanguagePack(), chineseParser.getTLPParams().headFinder(), chineseParser.getTLPParams().typedDependencyHeadFinder());
    compareSingleOutput(tree, false, paramsTreePrint, expectedChineseDeps2sd);
}
Also used : Tree(edu.stanford.nlp.trees.Tree) TreePrint(edu.stanford.nlp.trees.TreePrint)

Example 2 with TreePrint

use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.

the class LexicalizedParserITest method setUp.

// TODO: add more tests
@Override
public void setUp() throws Exception {
    synchronized (LexicalizedParserITest.class) {
        if (englishParser == null) {
            // sharing a bunch of code here with the webapp in
            // parser/webapp/index.jsp...  perhaps we could reuse that code
            englishParser = LexicalizedParser.loadModel();
            TreebankLanguagePack tLP = englishParser.getOp().tlpParams.treebankLanguagePack();
            tagPrint = new TreePrint("wordsAndTags", tLP);
            pennPrint = new TreePrint("penn", tLP);
            typDepPrint = new TreePrint("typedDependencies", "basicDependencies", tLP);
            // default is now CCprocessed
            typDepColPrint = new TreePrint("typedDependencies", tLP);
            File englishPath = new File(LexicalizedParser.DEFAULT_PARSER_LOC);
            String chinesePath = (englishPath.getParent() + File.separator + "chineseFactored.ser.gz");
            chineseParser = LexicalizedParser.loadModel(chinesePath);
            tLP = chineseParser.getOp().tlpParams.treebankLanguagePack();
            // test was made with Chinese SD not UD
            chineseParser.getTLPParams().setGenerateOriginalDependencies(true);
            chinesePennPrint = new TreePrint("penn", tLP);
            chineseTypDepPrint = new TreePrint("typedDependencies", "basicDependencies", tLP);
        }
    }
}
Also used : TreePrint(edu.stanford.nlp.trees.TreePrint) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) File(java.io.File)

Example 3 with TreePrint

use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.

the class JSONOutputter method print.

/** {@inheritDoc} */
// It's lying; we need the "redundant" casts (as of 2014-09-08)
@SuppressWarnings("RedundantCast")
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
    PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
    JSONWriter l0 = new JSONWriter(writer, options);
    l0.object(l1 -> {
        l1.set("docId", doc.get(CoreAnnotations.DocIDAnnotation.class));
        l1.set("docDate", doc.get(CoreAnnotations.DocDateAnnotation.class));
        l1.set("docSourceType", doc.get(CoreAnnotations.DocSourceTypeAnnotation.class));
        l1.set("docType", doc.get(CoreAnnotations.DocTypeAnnotation.class));
        l1.set("author", doc.get(CoreAnnotations.AuthorAnnotation.class));
        l1.set("location", doc.get(CoreAnnotations.LocationAnnotation.class));
        if (options.includeText) {
            l1.set("text", doc.get(CoreAnnotations.TextAnnotation.class));
        }
        if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
            l1.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<Writer>) (Writer l2) -> {
                l2.set("id", sentence.get(CoreAnnotations.SentenceIDAnnotation.class));
                l2.set("index", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
                l2.set("line", sentence.get(CoreAnnotations.LineNumberAnnotation.class));
                StringWriter treeStrWriter = new StringWriter();
                TreePrint treePrinter = options.constituentTreePrinter;
                if (treePrinter == AnnotationOutputter.DEFAULT_CONSTITUENT_TREE_PRINTER) {
                    treePrinter = new TreePrint("oneline");
                }
                treePrinter.printTree(sentence.get(TreeCoreAnnotations.TreeAnnotation.class), new PrintWriter(treeStrWriter, true));
                String treeStr = treeStrWriter.toString().trim();
                if (!"SENTENCE_SKIPPED_OR_UNPARSABLE".equals(treeStr)) {
                    l2.set("parse", treeStr);
                }
                l2.set("basicDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)));
                l2.set("enhancedDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)));
                l2.set("enhancedPlusPlusDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)));
                Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
                if (sentimentTree != null) {
                    int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
                    String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
                    l2.set("sentimentValue", Integer.toString(sentiment));
                    l2.set("sentiment", sentimentClass.replaceAll(" ", ""));
                }
                Collection<RelationTriple> openIETriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
                if (openIETriples != null) {
                    l2.set("openie", openIETriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
                        tripleWriter.set("subject", triple.subjectGloss());
                        tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
                        tripleWriter.set("relation", triple.relationGloss());
                        tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
                        tripleWriter.set("object", triple.objectGloss());
                        tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
                    }));
                }
                Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
                if (kbpTriples != null) {
                    l2.set("kbp", kbpTriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
                        tripleWriter.set("subject", triple.subjectGloss());
                        tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
                        tripleWriter.set("relation", triple.relationGloss());
                        tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
                        tripleWriter.set("object", triple.objectGloss());
                        tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
                    }));
                }
                if (sentence.get(CoreAnnotations.MentionsAnnotation.class) != null) {
                    Integer sentTokenBegin = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
                    l2.set("entitymentions", sentence.get(CoreAnnotations.MentionsAnnotation.class).stream().map(m -> (Consumer<Writer>) (Writer l3) -> {
                        Integer tokenBegin = m.get(CoreAnnotations.TokenBeginAnnotation.class);
                        Integer tokenEnd = m.get(CoreAnnotations.TokenEndAnnotation.class);
                        l3.set("docTokenBegin", tokenBegin);
                        l3.set("docTokenEnd", tokenEnd);
                        if (tokenBegin != null && sentTokenBegin != null) {
                            l3.set("tokenBegin", tokenBegin - sentTokenBegin);
                        }
                        if (tokenEnd != null && sentTokenBegin != null) {
                            l3.set("tokenEnd", tokenEnd - sentTokenBegin);
                        }
                        l3.set("text", m.get(CoreAnnotations.TextAnnotation.class));
                        l3.set("characterOffsetBegin", m.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                        l3.set("characterOffsetEnd", m.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                        l3.set("ner", m.get(CoreAnnotations.NamedEntityTagAnnotation.class));
                        l3.set("normalizedNER", m.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
                        l3.set("entitylink", m.get(CoreAnnotations.WikipediaEntityAnnotation.class));
                        Timex time = m.get(TimeAnnotations.TimexAnnotation.class);
                        if (time != null) {
                            Timex.Range range = time.range();
                            l3.set("timex", (Consumer<Writer>) l4 -> {
                                l4.set("tid", time.tid());
                                l4.set("type", time.timexType());
                                l4.set("value", time.value());
                                l4.set("altValue", time.altVal());
                                l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
                                    l5.set("begin", range.begin);
                                    l5.set("end", range.end);
                                    l5.set("duration", range.duration);
                                } : null);
                            });
                        }
                    }));
                }
                if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
                    l2.set("tokens", sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(token -> (Consumer<Writer>) (Writer l3) -> {
                        l3.set("index", token.index());
                        l3.set("word", token.word());
                        l3.set("originalText", token.originalText());
                        l3.set("lemma", token.lemma());
                        l3.set("characterOffsetBegin", token.beginPosition());
                        l3.set("characterOffsetEnd", token.endPosition());
                        l3.set("pos", token.tag());
                        l3.set("ner", token.ner());
                        l3.set("normalizedNER", token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
                        l3.set("speaker", token.get(CoreAnnotations.SpeakerAnnotation.class));
                        l3.set("truecase", token.get(CoreAnnotations.TrueCaseAnnotation.class));
                        l3.set("truecaseText", token.get(CoreAnnotations.TrueCaseTextAnnotation.class));
                        l3.set("before", token.get(CoreAnnotations.BeforeAnnotation.class));
                        l3.set("after", token.get(CoreAnnotations.AfterAnnotation.class));
                        l3.set("entitylink", token.get(CoreAnnotations.WikipediaEntityAnnotation.class));
                        Timex time = token.get(TimeAnnotations.TimexAnnotation.class);
                        if (time != null) {
                            Timex.Range range = time.range();
                            l3.set("timex", (Consumer<Writer>) l4 -> {
                                l4.set("tid", time.tid());
                                l4.set("type", time.timexType());
                                l4.set("value", time.value());
                                l4.set("altValue", time.altVal());
                                l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
                                    l5.set("begin", range.begin);
                                    l5.set("end", range.end);
                                    l5.set("duration", range.duration);
                                } : null);
                            });
                        }
                    }));
                }
            }));
        }
        if (doc.get(CorefCoreAnnotations.CorefChainAnnotation.class) != null) {
            Map<Integer, CorefChain> corefChains = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);
            if (corefChains != null) {
                l1.set("corefs", (Consumer<Writer>) chainWriter -> {
                    for (CorefChain chain : corefChains.values()) {
                        CorefChain.CorefMention representative = chain.getRepresentativeMention();
                        chainWriter.set(Integer.toString(chain.getChainID()), chain.getMentionsInTextualOrder().stream().map(mention -> (Consumer<Writer>) (Writer mentionWriter) -> {
                            mentionWriter.set("id", mention.mentionID);
                            mentionWriter.set("text", mention.mentionSpan);
                            mentionWriter.set("type", mention.mentionType);
                            mentionWriter.set("number", mention.number);
                            mentionWriter.set("gender", mention.gender);
                            mentionWriter.set("animacy", mention.animacy);
                            mentionWriter.set("startIndex", mention.startIndex);
                            mentionWriter.set("endIndex", mention.endIndex);
                            mentionWriter.set("headIndex", mention.headIndex);
                            mentionWriter.set("sentNum", mention.sentNum);
                            mentionWriter.set("position", Arrays.stream(mention.position.elems()).boxed().collect(Collectors.toList()));
                            mentionWriter.set("isRepresentativeMention", mention == representative);
                        }));
                    }
                });
            }
        }
        if (doc.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
            List<CoreMap> quotes = QuoteAnnotator.gatherQuotes(doc);
            l1.set("quotes", quotes.stream().map(quote -> (Consumer<Writer>) (Writer l2) -> {
                l2.set("id", quote.get(CoreAnnotations.QuotationIndexAnnotation.class));
                l2.set("text", quote.get(CoreAnnotations.TextAnnotation.class));
                l2.set("beginIndex", quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                l2.set("endIndex", quote.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                l2.set("beginToken", quote.get(CoreAnnotations.TokenBeginAnnotation.class));
                l2.set("endToken", quote.get(CoreAnnotations.TokenEndAnnotation.class));
                l2.set("beginSentence", quote.get(CoreAnnotations.SentenceBeginAnnotation.class));
                l2.set("endSentence", quote.get(CoreAnnotations.SentenceEndAnnotation.class));
            }));
        }
    });
    // flush
    l0.writer.flush();
}
Also used : java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) SentenceUtils(edu.stanford.nlp.ling.SentenceUtils) Tree(edu.stanford.nlp.trees.Tree) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Pair(edu.stanford.nlp.util.Pair) CoreMap(edu.stanford.nlp.util.CoreMap) Timex(edu.stanford.nlp.time.Timex) IndexedWord(edu.stanford.nlp.ling.IndexedWord) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TreePrint(edu.stanford.nlp.trees.TreePrint) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) IOUtils(edu.stanford.nlp.io.IOUtils) Pointer(edu.stanford.nlp.util.Pointer) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) DecimalFormat(java.text.DecimalFormat) StringOutputStream(edu.stanford.nlp.io.StringOutputStream) Collectors(java.util.stream.Collectors) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Consumer(java.util.function.Consumer) Stream(java.util.stream.Stream) java.io(java.io) Generics(edu.stanford.nlp.util.Generics) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) TreePrint(edu.stanford.nlp.trees.TreePrint) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Consumer(java.util.function.Consumer) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) Tree(edu.stanford.nlp.trees.Tree) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TreePrint(edu.stanford.nlp.trees.TreePrint) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Timex(edu.stanford.nlp.time.Timex)

Example 4 with TreePrint

use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.

the class EvaluateTreebank method testOnTreebank.

/** Test the parser on a treebank. Parses will be written to stdout, and
   *  various other information will be written to stderr and stdout,
   *  particularly if <code>op.testOptions.verbose</code> is true.
   *
   *  @param testTreebank The treebank to parse
   *  @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
   *          of the parser on the treebank.
   */
public double testOnTreebank(Treebank testTreebank) {
    log.info("Testing on treebank");
    Timing treebankTotalTimer = new Timing();
    TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
    TreebankLangParserParams tlpParams = op.tlpParams;
    TreebankLanguagePack tlp = op.langpack();
    PrintWriter pwOut, pwErr;
    if (op.testOptions.quietEvaluation) {
        NullOutputStream quiet = new NullOutputStream();
        pwOut = tlpParams.pw(quiet);
        pwErr = tlpParams.pw(quiet);
    } else {
        pwOut = tlpParams.pw();
        pwErr = tlpParams.pw(System.err);
    }
    if (op.testOptions.verbose) {
        pwErr.print("Testing ");
        pwErr.println(testTreebank.textualSummary(tlp));
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.initEVALBfiles(tlpParams);
    }
    PrintWriter pwFileOut = null;
    if (op.testOptions.writeOutputFiles) {
        String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
        try {
            pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    PrintWriter pwStats = null;
    if (op.testOptions.outputkBestEquivocation != null) {
        try {
            pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    if (op.testOptions.testingThreads != 1) {
        MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
        LinkedList<Tree> goldTrees = new LinkedList<>();
        for (Tree goldTree : testTreebank) {
            List<? extends HasWord> sentence = getInputSentence(goldTree);
            goldTrees.add(goldTree);
            pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
            wrapper.put(sentence);
            while (wrapper.peek()) {
                ParserQuery pq = wrapper.poll();
                goldTree = goldTrees.poll();
                processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
            }
        }
        // for tree iterator
        wrapper.join();
        while (wrapper.peek()) {
            ParserQuery pq = wrapper.poll();
            Tree goldTree = goldTrees.poll();
            processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
        }
    } else {
        ParserQuery pq = pqFactory.parserQuery();
        for (Tree goldTree : testTreebank) {
            final List<CoreLabel> sentence = getInputSentence(goldTree);
            pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
            pq.parseAndReport(sentence, pwErr);
            processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
        }
    // for tree iterator
    }
    //Done parsing...print the results of the evaluations
    treebankTotalTimer.done("Testing on treebank");
    if (op.testOptions.quietEvaluation) {
        pwErr = tlpParams.pw(System.err);
    }
    if (saidMemMessage) {
        ParserUtils.printOutOfMemory(pwErr);
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.closeEVALBfiles();
    }
    if (numSkippedEvals != 0) {
        pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
    }
    // only created here so we know what parser types are supported...
    ParserQuery pq = pqFactory.parserQuery();
    if (summary) {
        if (pcfgLB != null)
            pcfgLB.display(false, pwErr);
        if (pcfgChildSpecific != null)
            pcfgChildSpecific.display(false, pwErr);
        if (pcfgLA != null)
            pcfgLA.display(false, pwErr);
        if (pcfgCB != null)
            pcfgCB.display(false, pwErr);
        if (pcfgDA != null)
            pcfgDA.display(false, pwErr);
        if (pcfgTA != null)
            pcfgTA.display(false, pwErr);
        if (pcfgLL != null && pq.getPCFGParser() != null)
            pcfgLL.display(false, pwErr);
        if (depDA != null)
            depDA.display(false, pwErr);
        if (depTA != null)
            depTA.display(false, pwErr);
        if (depLL != null && pq.getDependencyParser() != null)
            depLL.display(false, pwErr);
        if (factLB != null)
            factLB.display(false, pwErr);
        if (factChildSpecific != null)
            factChildSpecific.display(false, pwErr);
        if (factLA != null)
            factLA.display(false, pwErr);
        if (factCB != null)
            factCB.display(false, pwErr);
        if (factDA != null)
            factDA.display(false, pwErr);
        if (factTA != null)
            factTA.display(false, pwErr);
        if (factLL != null && pq.getFactoredParser() != null)
            factLL.display(false, pwErr);
        if (pcfgCatE != null)
            pcfgCatE.display(false, pwErr);
        for (Eval eval : evals) {
            eval.display(false, pwErr);
        }
        for (BestOfTopKEval eval : topKEvals) {
            eval.display(false, pwErr);
        }
    }
    // these ones only have a display mode, so display if turned on!!
    if (pcfgRUO != null)
        pcfgRUO.display(true, pwErr);
    if (pcfgCUO != null)
        pcfgCUO.display(true, pwErr);
    if (tsv) {
        NumberFormat nf = new DecimalFormat("0.00");
        pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getDependencyParser() != null && factDA != null)
            pwErr.print(nf.format(factDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getExactPercent()));
        pwErr.print("\t");
        if (pcfgLB != null)
            pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getDependencyParser() != null && depDA != null)
            pwErr.print(nf.format(depDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getPCFGParser() != null && factTA != null)
            pwErr.print(nf.format(factTA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(factLB.getNum());
        pwErr.println();
    }
    double f1 = 0.0;
    if (factLB != null) {
        f1 = factLB.getEvalbF1();
    }
    //Close files (if necessary)
    if (pwFileOut != null)
        pwFileOut.close();
    if (pwStats != null)
        pwStats.close();
    if (parserQueryEvals != null) {
        for (ParserQueryEval parserQueryEval : parserQueryEvals) {
            parserQueryEval.display(false, pwErr);
        }
    }
    return f1;
}
Also used : DecimalFormat(java.text.DecimalFormat) TreePrint(edu.stanford.nlp.trees.TreePrint) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) LeafAncestorEval(edu.stanford.nlp.parser.metrics.LeafAncestorEval) AbstractEval(edu.stanford.nlp.parser.metrics.AbstractEval) TaggingEval(edu.stanford.nlp.parser.metrics.TaggingEval) TopMatchEval(edu.stanford.nlp.parser.metrics.TopMatchEval) FilteredEval(edu.stanford.nlp.parser.metrics.FilteredEval) Eval(edu.stanford.nlp.parser.metrics.Eval) UnlabeledAttachmentEval(edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) PrintWriter(java.io.PrintWriter) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) IOException(java.io.IOException) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) LinkedList(java.util.LinkedList) ParsingThreadsafeProcessor(edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor) FileOutputStream(java.io.FileOutputStream) Timing(edu.stanford.nlp.util.Timing) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) NullOutputStream(edu.stanford.nlp.io.NullOutputStream) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) NumberFormat(java.text.NumberFormat)

Example 5 with TreePrint

use of edu.stanford.nlp.trees.TreePrint in project CoreNLP by stanfordnlp.

the class EvaluateTreebank method processResults.

public void processResults(ParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) {
    if (pq.saidMemMessage()) {
        saidMemMessage = true;
    }
    Tree tree;
    List<? extends HasWord> sentence = pq.originalSentence();
    try {
        tree = pq.getBestParse();
    } catch (NoSuchParseException e) {
        tree = null;
    }
    List<ScoredObject<Tree>> kbestPCFGTrees = null;
    if (tree != null && kbestPCFG > 0) {
        kbestPCFGTrees = pq.getKBestPCFGParses(kbestPCFG);
    }
    //combo parse goes to pwOut (System.out)
    if (op.testOptions.verbose) {
        pwOut.println("ComboParser best");
        Tree ot = tree;
        if (ot != null && !op.tlpParams.treebankLanguagePack().isStartSymbol(ot.value())) {
            ot = ot.treeFactory().newTreeNode(op.tlpParams.treebankLanguagePack().startSymbol(), Collections.singletonList(ot));
        }
        treePrint.printTree(ot, pwOut);
    } else {
        treePrint.printTree(tree, pwOut);
    }
    // print various statistics
    if (tree != null) {
        if (op.testOptions.printAllBestParses) {
            List<ScoredObject<Tree>> parses = pq.getBestPCFGParses();
            int sz = parses.size();
            if (sz > 1) {
                pwOut.println("There were " + sz + " best PCFG parses with score " + parses.get(0).score() + '.');
                Tree transGoldTree = collinizer.transformTree(goldTree);
                int iii = 0;
                for (ScoredObject<Tree> sot : parses) {
                    iii++;
                    Tree tb = sot.object();
                    Tree tbd = debinarizer.transformTree(tb);
                    tbd = subcategoryStripper.transformTree(tbd);
                    pq.restoreOriginalWords(tbd);
                    pwOut.println("PCFG Parse #" + iii + " with score " + tbd.score());
                    tbd.pennPrint(pwOut);
                    Tree tbtr = collinizer.transformTree(tbd);
                    // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
                    kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
                }
            }
        } else // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
        if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) {
            List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
            Tree transGoldTree = collinizer.transformTree(goldTree);
            int i = 0;
            for (ScoredObject<Tree> tp : trees) {
                i++;
                pwOut.println("PCFG Parse #" + i + " with score " + tp.score());
                Tree tbd = tp.object();
                tbd.pennPrint(pwOut);
                Tree tbtr = collinizer.transformTree(tbd);
                kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
            }
        } else // Chart parser (factored) n-best list
        if (op.testOptions.printFactoredKGood > 0 && pq.hasFactoredParse()) {
            // DZ: debug n best trees
            List<ScoredObject<Tree>> trees = pq.getKGoodFactoredParses(op.testOptions.printFactoredKGood);
            Tree transGoldTree = collinizer.transformTree(goldTree);
            int ii = 0;
            for (ScoredObject<Tree> tp : trees) {
                ii++;
                pwOut.println("Factored Parse #" + ii + " with score " + tp.score());
                Tree tbd = tp.object();
                tbd.pennPrint(pwOut);
                Tree tbtr = collinizer.transformTree(tbd);
                kGoodLB.evaluate(tbtr, transGoldTree, pwOut);
            }
        } else //1-best output
        if (pwFileOut != null) {
            pwFileOut.println(tree.toString());
        }
        //Print the derivational entropy
        if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) {
            List<ScoredObject<Tree>> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
            double[] logScores = new double[trees.size()];
            int treeId = 0;
            for (ScoredObject<Tree> kBestTree : trees) logScores[treeId++] = kBestTree.score();
            //Re-normalize
            double entropy = 0.0;
            double denom = ArrayMath.logSum(logScores);
            for (double logScore : logScores) {
                double logPr = logScore - denom;
                entropy += Math.exp(logPr) * (logPr / Math.log(2));
            }
            //Convert to bits
            entropy *= -1;
            pwStats.printf("%f\t%d\t%d\n", entropy, trees.size(), sentence.size());
        }
    }
    // Perform various evaluations specified by the user
    if (tree != null) {
        //Strip subcategories and remove punctuation for evaluation
        tree = subcategoryStripper.transformTree(tree);
        Tree treeFact = collinizer.transformTree(tree);
        //Setup the gold tree
        if (op.testOptions.verbose) {
            pwOut.println("Correct parse");
            treePrint.printTree(goldTree, pwOut);
        }
        Tree transGoldTree = collinizer.transformTree(goldTree);
        if (transGoldTree != null)
            transGoldTree = subcategoryStripper.transformTree(transGoldTree);
        //Can't do evaluation in these two cases
        if (transGoldTree == null) {
            pwErr.println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
            goldTree.pennPrint(pwErr);
            numSkippedEvals++;
            return;
        } else if (treeFact == null) {
            pwErr.println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
            tree.pennPrint(pwErr);
            numSkippedEvals++;
            return;
        } else if (treeFact.yield().size() != transGoldTree.yield().size()) {
            List<Label> fYield = treeFact.yield();
            List<Label> gYield = transGoldTree.yield();
            pwErr.println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
            pwErr.printf("  sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.size(), goldTree.yield().size(), fYield.size(), tree.yield().size());
            pwErr.println("  gold: " + SentenceUtils.listToString(gYield, true));
            pwErr.println("  pars: " + SentenceUtils.listToString(fYield, true));
            numSkippedEvals++;
            return;
        }
        if (topKEvals.size() > 0) {
            List<Tree> transGuesses = new ArrayList<>();
            int kbest = Math.min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.size());
            for (ScoredObject<Tree> guess : kbestPCFGTrees.subList(0, kbest)) {
                transGuesses.add(collinizer.transformTree(guess.object()));
            }
            for (BestOfTopKEval eval : topKEvals) {
                eval.evaluate(transGuesses, transGoldTree, pwErr);
            }
        }
        //PCFG eval
        Tree treePCFG = pq.getBestPCFGParse();
        if (treePCFG != null) {
            Tree treePCFGeval = collinizer.transformTree(treePCFG);
            if (pcfgLB != null) {
                pcfgLB.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgChildSpecific != null) {
                pcfgChildSpecific.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgLA != null) {
                pcfgLA.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgCB != null) {
                pcfgCB.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgDA != null) {
                // Re-index the leaves after Collinization, stripping traces, etc.
                treePCFGeval.indexLeaves(true);
                transGoldTree.indexLeaves(true);
                pcfgDA.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgTA != null) {
                pcfgTA.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgLL != null && pq.getPCFGParser() != null) {
                pcfgLL.recordScore(pq.getPCFGParser(), pwErr);
            }
            if (pcfgRUO != null) {
                pcfgRUO.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgCUO != null) {
                pcfgCUO.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
            if (pcfgCatE != null) {
                pcfgCatE.evaluate(treePCFGeval, transGoldTree, pwErr);
            }
        }
        //Dependency eval
        // todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
        Tree treeDep = pq.getBestDependencyParse(false);
        if (treeDep != null) {
            Tree goldTreeB = binarizerOnly.transformTree(goldTree);
            Tree goldTreeEval = goldTree.deepCopy();
            goldTreeEval.indexLeaves(true);
            goldTreeEval.percolateHeads(op.langpack().headFinder());
            Tree depDAEval = pq.getBestDependencyParse(true);
            depDAEval.indexLeaves(true);
            depDAEval.percolateHeadIndices();
            if (depDA != null) {
                depDA.evaluate(depDAEval, goldTreeEval, pwErr);
            }
            if (depTA != null) {
                Tree undoneTree = debinarizer.transformTree(treeDep);
                undoneTree = subcategoryStripper.transformTree(undoneTree);
                pq.restoreOriginalWords(undoneTree);
                // pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
                depTA.evaluate(undoneTree, goldTree, pwErr);
            }
            if (depLL != null && pq.getDependencyParser() != null) {
                depLL.recordScore(pq.getDependencyParser(), pwErr);
            }
            Tree factTreeB;
            if (pq.hasFactoredParse()) {
                factTreeB = pq.getBestFactoredParse();
            } else {
                factTreeB = treeDep;
            }
            if (factDA != null) {
                factDA.evaluate(factTreeB, goldTreeB, pwErr);
            }
        }
        //Factored parser (1best) eval
        if (factLB != null) {
            factLB.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (factChildSpecific != null) {
            factChildSpecific.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (factLA != null) {
            factLA.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (factTA != null) {
            factTA.evaluate(tree, boundaryRemover.transformTree(goldTree), pwErr);
        }
        if (factLL != null && pq.getFactoredParser() != null) {
            factLL.recordScore(pq.getFactoredParser(), pwErr);
        }
        if (factCB != null) {
            factCB.evaluate(treeFact, transGoldTree, pwErr);
        }
        for (Eval eval : evals) {
            eval.evaluate(treeFact, transGoldTree, pwErr);
        }
        if (parserQueryEvals != null) {
            for (ParserQueryEval eval : parserQueryEvals) {
                eval.evaluate(pq, transGoldTree, pwErr);
            }
        }
        if (op.testOptions.evalb) {
            // empty out scores just in case
            nanScores(tree);
            EvalbFormatWriter.writeEVALBline(treeFact, transGoldTree);
        }
    }
    pwErr.println();
}
Also used : ArrayList(java.util.ArrayList) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) TreePrint(edu.stanford.nlp.trees.TreePrint) NoSuchParseException(edu.stanford.nlp.parser.common.NoSuchParseException) ScoredObject(edu.stanford.nlp.util.ScoredObject) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) LeafAncestorEval(edu.stanford.nlp.parser.metrics.LeafAncestorEval) AbstractEval(edu.stanford.nlp.parser.metrics.AbstractEval) TaggingEval(edu.stanford.nlp.parser.metrics.TaggingEval) TopMatchEval(edu.stanford.nlp.parser.metrics.TopMatchEval) FilteredEval(edu.stanford.nlp.parser.metrics.FilteredEval) Eval(edu.stanford.nlp.parser.metrics.Eval) UnlabeledAttachmentEval(edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval)

Aggregations

TreePrint (edu.stanford.nlp.trees.TreePrint)8 Tree (edu.stanford.nlp.trees.Tree)4 PrintWriter (java.io.PrintWriter)3 AbstractEval (edu.stanford.nlp.parser.metrics.AbstractEval)2 BestOfTopKEval (edu.stanford.nlp.parser.metrics.BestOfTopKEval)2 Eval (edu.stanford.nlp.parser.metrics.Eval)2 FilteredEval (edu.stanford.nlp.parser.metrics.FilteredEval)2 LeafAncestorEval (edu.stanford.nlp.parser.metrics.LeafAncestorEval)2 ParserQueryEval (edu.stanford.nlp.parser.metrics.ParserQueryEval)2 TaggingEval (edu.stanford.nlp.parser.metrics.TaggingEval)2 TopMatchEval (edu.stanford.nlp.parser.metrics.TopMatchEval)2 UnlabeledAttachmentEval (edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval)2 TreebankLanguagePack (edu.stanford.nlp.trees.TreebankLanguagePack)2 DecimalFormat (java.text.DecimalFormat)2 ArrayList (java.util.ArrayList)2 LinkedList (java.util.LinkedList)2 List (java.util.List)2 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)1 CorefChain (edu.stanford.nlp.coref.data.CorefChain)1 Span (edu.stanford.nlp.ie.machinereading.structure.Span)1