Search in sources :

Example 11 with TreebankLanguagePack

use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.

the class EvaluateTreebank method testOnTreebank.

/** Test the parser on a treebank. Parses will be written to stdout, and
   *  various other information will be written to stderr and stdout,
   *  particularly if <code>op.testOptions.verbose</code> is true.
   *
   *  @param testTreebank The treebank to parse
   *  @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
   *          of the parser on the treebank.
   */
public double testOnTreebank(Treebank testTreebank) {
    log.info("Testing on treebank");
    Timing treebankTotalTimer = new Timing();
    TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
    TreebankLangParserParams tlpParams = op.tlpParams;
    TreebankLanguagePack tlp = op.langpack();
    PrintWriter pwOut, pwErr;
    if (op.testOptions.quietEvaluation) {
        NullOutputStream quiet = new NullOutputStream();
        pwOut = tlpParams.pw(quiet);
        pwErr = tlpParams.pw(quiet);
    } else {
        pwOut = tlpParams.pw();
        pwErr = tlpParams.pw(System.err);
    }
    if (op.testOptions.verbose) {
        pwErr.print("Testing ");
        pwErr.println(testTreebank.textualSummary(tlp));
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.initEVALBfiles(tlpParams);
    }
    PrintWriter pwFileOut = null;
    if (op.testOptions.writeOutputFiles) {
        String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
        try {
            pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    PrintWriter pwStats = null;
    if (op.testOptions.outputkBestEquivocation != null) {
        try {
            pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    if (op.testOptions.testingThreads != 1) {
        MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
        LinkedList<Tree> goldTrees = new LinkedList<>();
        for (Tree goldTree : testTreebank) {
            List<? extends HasWord> sentence = getInputSentence(goldTree);
            goldTrees.add(goldTree);
            pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
            wrapper.put(sentence);
            while (wrapper.peek()) {
                ParserQuery pq = wrapper.poll();
                goldTree = goldTrees.poll();
                processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
            }
        }
        // for tree iterator
        wrapper.join();
        while (wrapper.peek()) {
            ParserQuery pq = wrapper.poll();
            Tree goldTree = goldTrees.poll();
            processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
        }
    } else {
        ParserQuery pq = pqFactory.parserQuery();
        for (Tree goldTree : testTreebank) {
            final List<CoreLabel> sentence = getInputSentence(goldTree);
            pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
            pq.parseAndReport(sentence, pwErr);
            processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
        }
    // for tree iterator
    }
    //Done parsing...print the results of the evaluations
    treebankTotalTimer.done("Testing on treebank");
    if (op.testOptions.quietEvaluation) {
        pwErr = tlpParams.pw(System.err);
    }
    if (saidMemMessage) {
        ParserUtils.printOutOfMemory(pwErr);
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.closeEVALBfiles();
    }
    if (numSkippedEvals != 0) {
        pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
    }
    // only created here so we know what parser types are supported...
    ParserQuery pq = pqFactory.parserQuery();
    if (summary) {
        if (pcfgLB != null)
            pcfgLB.display(false, pwErr);
        if (pcfgChildSpecific != null)
            pcfgChildSpecific.display(false, pwErr);
        if (pcfgLA != null)
            pcfgLA.display(false, pwErr);
        if (pcfgCB != null)
            pcfgCB.display(false, pwErr);
        if (pcfgDA != null)
            pcfgDA.display(false, pwErr);
        if (pcfgTA != null)
            pcfgTA.display(false, pwErr);
        if (pcfgLL != null && pq.getPCFGParser() != null)
            pcfgLL.display(false, pwErr);
        if (depDA != null)
            depDA.display(false, pwErr);
        if (depTA != null)
            depTA.display(false, pwErr);
        if (depLL != null && pq.getDependencyParser() != null)
            depLL.display(false, pwErr);
        if (factLB != null)
            factLB.display(false, pwErr);
        if (factChildSpecific != null)
            factChildSpecific.display(false, pwErr);
        if (factLA != null)
            factLA.display(false, pwErr);
        if (factCB != null)
            factCB.display(false, pwErr);
        if (factDA != null)
            factDA.display(false, pwErr);
        if (factTA != null)
            factTA.display(false, pwErr);
        if (factLL != null && pq.getFactoredParser() != null)
            factLL.display(false, pwErr);
        if (pcfgCatE != null)
            pcfgCatE.display(false, pwErr);
        for (Eval eval : evals) {
            eval.display(false, pwErr);
        }
        for (BestOfTopKEval eval : topKEvals) {
            eval.display(false, pwErr);
        }
    }
    // these ones only have a display mode, so display if turned on!!
    if (pcfgRUO != null)
        pcfgRUO.display(true, pwErr);
    if (pcfgCUO != null)
        pcfgCUO.display(true, pwErr);
    if (tsv) {
        NumberFormat nf = new DecimalFormat("0.00");
        pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getDependencyParser() != null && factDA != null)
            pwErr.print(nf.format(factDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getExactPercent()));
        pwErr.print("\t");
        if (pcfgLB != null)
            pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getDependencyParser() != null && depDA != null)
            pwErr.print(nf.format(depDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq.getPCFGParser() != null && factTA != null)
            pwErr.print(nf.format(factTA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(factLB.getNum());
        pwErr.println();
    }
    double f1 = 0.0;
    if (factLB != null) {
        f1 = factLB.getEvalbF1();
    }
    //Close files (if necessary)
    if (pwFileOut != null)
        pwFileOut.close();
    if (pwStats != null)
        pwStats.close();
    if (parserQueryEvals != null) {
        for (ParserQueryEval parserQueryEval : parserQueryEvals) {
            parserQueryEval.display(false, pwErr);
        }
    }
    return f1;
}
Also used : DecimalFormat(java.text.DecimalFormat) TreePrint(edu.stanford.nlp.trees.TreePrint) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) LeafAncestorEval(edu.stanford.nlp.parser.metrics.LeafAncestorEval) AbstractEval(edu.stanford.nlp.parser.metrics.AbstractEval) TaggingEval(edu.stanford.nlp.parser.metrics.TaggingEval) TopMatchEval(edu.stanford.nlp.parser.metrics.TopMatchEval) FilteredEval(edu.stanford.nlp.parser.metrics.FilteredEval) Eval(edu.stanford.nlp.parser.metrics.Eval) UnlabeledAttachmentEval(edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) PrintWriter(java.io.PrintWriter) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) IOException(java.io.IOException) ParserQueryEval(edu.stanford.nlp.parser.metrics.ParserQueryEval) LinkedList(java.util.LinkedList) ParsingThreadsafeProcessor(edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor) FileOutputStream(java.io.FileOutputStream) Timing(edu.stanford.nlp.util.Timing) BestOfTopKEval(edu.stanford.nlp.parser.metrics.BestOfTopKEval) NullOutputStream(edu.stanford.nlp.io.NullOutputStream) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) NumberFormat(java.text.NumberFormat)

Example 12 with TreebankLanguagePack

use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.

the class HebrewTreeReaderFactory method main.

/**
 * @param args
 */
public static void main(String[] args) {
    if (args.length != 1) {
        System.err.printf("Usage: java %s tree_file > trees%n", HebrewTreeReaderFactory.class.getName());
        System.exit(-1);
    }
    TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
    File treeFile = new File(args[0]);
    try {
        TreeReaderFactory trf = new HebrewTreeReaderFactory();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
        TreeReader tr = trf.newTreeReader(br);
        int numTrees = 0;
        for (Tree t; ((t = tr.readTree()) != null); numTrees++) System.out.println(t.toString());
        tr.close();
        System.err.printf("Processed %d trees.%n", numTrees);
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 13 with TreebankLanguagePack

use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.

the class EvaluateTreebank method testOnTreebank.

public double testOnTreebank(EvaluationDataset testTreebank) {
    log.info("Testing on treebank");
    Timing treebankTotalTimer = new Timing();
    TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
    TreebankLangParserParams tlpParams = op.tlpParams;
    TreebankLanguagePack tlp = op.langpack();
    PrintWriter pwOut, pwErr;
    if (op.testOptions.quietEvaluation) {
        NullOutputStream quiet = new NullOutputStream();
        pwOut = tlpParams.pw(quiet);
        pwErr = tlpParams.pw(quiet);
    } else {
        pwOut = tlpParams.pw();
        pwErr = tlpParams.pw(System.err);
    }
    if (op.testOptions.verbose) {
        testTreebank.summarize(pwErr, tlp);
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.initEVALBfiles(tlpParams);
    }
    PrintWriter pwFileOut = null;
    if (op.testOptions.writeOutputFiles) {
        String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
        try {
            pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    PrintWriter pwStats = null;
    if (op.testOptions.outputkBestEquivocation != null) {
        try {
            pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
    List<Pair<ParserQuery, Tree>> results = testTreebank.dataset(pwErr, pwOut, pwFileOut, pwStats, treePrint);
    for (Pair<ParserQuery, Tree> result : results) {
        ParserQuery pq = result.first;
        Tree goldTree = result.second;
        processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
    }
    // Done parsing...print the results of the evaluations
    treebankTotalTimer.done("Testing on treebank");
    if (op.testOptions.quietEvaluation) {
        pwErr = tlpParams.pw(System.err);
    }
    if (saidMemMessage) {
        ParserUtils.printOutOfMemory(pwErr);
    }
    if (op.testOptions.evalb) {
        EvalbFormatWriter.closeEVALBfiles();
    }
    if (numSkippedEvals != 0) {
        pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
    }
    // only created here so we know what parser types are supported...
    // TODO: pass in the various pcfgparser, dependencyparser, etc?
    ParserQuery pq = pqFactory != null ? pqFactory.parserQuery() : null;
    if (summary) {
        if (pcfgLB != null)
            pcfgLB.display(false, pwErr);
        if (pcfgChildSpecific != null)
            pcfgChildSpecific.display(false, pwErr);
        if (pcfgLA != null)
            pcfgLA.display(false, pwErr);
        if (pcfgCB != null)
            pcfgCB.display(false, pwErr);
        if (pcfgDA != null)
            pcfgDA.display(false, pwErr);
        if (pcfgTA != null)
            pcfgTA.display(false, pwErr);
        if (pcfgLL != null && pq != null && pq.getPCFGParser() != null)
            pcfgLL.display(false, pwErr);
        if (depDA != null)
            depDA.display(false, pwErr);
        if (depTA != null)
            depTA.display(false, pwErr);
        if (depLL != null && pq != null && pq.getDependencyParser() != null)
            depLL.display(false, pwErr);
        if (factLB != null)
            factLB.display(false, pwErr);
        if (factChildSpecific != null)
            factChildSpecific.display(false, pwErr);
        if (factLA != null)
            factLA.display(false, pwErr);
        if (factCB != null)
            factCB.display(false, pwErr);
        if (factDA != null)
            factDA.display(false, pwErr);
        if (factTA != null)
            factTA.display(false, pwErr);
        if (factLL != null && pq != null && pq.getFactoredParser() != null)
            factLL.display(false, pwErr);
        if (pcfgCatE != null)
            pcfgCatE.display(false, pwErr);
        for (Eval eval : evals) {
            eval.display(false, pwErr);
        }
        for (BestOfTopKEval eval : topKEvals) {
            eval.display(false, pwErr);
        }
    }
    // these ones only have a display mode, so display if turned on!!
    if (pcfgRUO != null)
        pcfgRUO.display(true, pwErr);
    if (pcfgCUO != null)
        pcfgCUO.display(true, pwErr);
    if (tsv) {
        NumberFormat nf = new DecimalFormat("0.00");
        pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq != null && pq.getDependencyParser() != null && factDA != null)
            pwErr.print(nf.format(factDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(nf.format(factLB.getExactPercent()));
        pwErr.print("\t");
        if (pcfgLB != null)
            pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq != null && pq.getDependencyParser() != null && depDA != null)
            pwErr.print(nf.format(depDA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (pq != null && pq.getPCFGParser() != null && factTA != null)
            pwErr.print(nf.format(factTA.getEvalbF1Percent()));
        pwErr.print("\t");
        if (factLB != null)
            pwErr.print(factLB.getNum());
        pwErr.println();
    }
    double f1 = 0.0;
    if (factLB != null) {
        f1 = factLB.getEvalbF1();
    }
    // Close files (if necessary)
    if (pwFileOut != null)
        pwFileOut.close();
    if (pwStats != null)
        pwStats.close();
    for (ParserQueryEval parserQueryEval : parserQueryEvals) {
        parserQueryEval.display(false, pwErr);
    }
    return f1;
}
Also used : DecimalFormat(java.text.DecimalFormat) TreePrint(edu.stanford.nlp.trees.TreePrint) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) Timing(edu.stanford.nlp.util.Timing) PrintWriter(java.io.PrintWriter) NullOutputStream(edu.stanford.nlp.io.NullOutputStream) Pair(edu.stanford.nlp.util.Pair) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) NumberFormat(java.text.NumberFormat)

Example 14 with TreebankLanguagePack

use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.

the class LexicalizedParserQuery method addSentenceFinalPunctIfNeeded.

/**
 * Adds a sentence final punctuation mark to sentences that lack one.
 *  This method adds a period (the first sentence final punctuation word
 *  in a parser language pack) to sentences that don't have one within
 *  the last 3 words (to allow for close parentheses, etc.).  It checks
 *  tags for punctuation, if available, otherwise words.
 *
 *  @param sentence The sentence to check
 *  @param length The length of the sentence (just to avoid recomputation)
 */
private boolean addSentenceFinalPunctIfNeeded(List<HasWord> sentence, int length) {
    int start = length - 3;
    if (start < 0)
        start = 0;
    TreebankLanguagePack tlp = op.tlpParams.treebankLanguagePack();
    for (int i = length - 1; i >= start; i--) {
        HasWord item = sentence.get(i);
        // An object (e.g., CoreLabel) can implement HasTag but not actually store
        // a tag so we need to check that there is something there for this case.
        // If there is, use only it, since word tokens can be ambiguous.
        String tag = null;
        if (item instanceof HasTag) {
            tag = ((HasTag) item).tag();
        }
        if (tag != null && !tag.isEmpty()) {
            if (tlp.isSentenceFinalPunctuationTag(tag)) {
                return false;
            }
        } else {
            String str = item.word();
            if (tlp.isPunctuationWord(str)) {
                return false;
            }
        }
    }
    // none found so add one.
    if (op.testOptions.verbose) {
        log.info("Adding missing final punctuation to sentence.");
    }
    String[] sfpWords = tlp.sentenceFinalPunctuationWords();
    if (sfpWords.length > 0) {
        sentence.add(new Word(sfpWords[0]));
    }
    return true;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) Word(edu.stanford.nlp.ling.Word) HasTag(edu.stanford.nlp.ling.HasTag) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) TreePrint(edu.stanford.nlp.trees.TreePrint) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 15 with TreebankLanguagePack

use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.

the class NegraPennTreeReaderFactory method main.

/**
 * @param args File to run on
 */
public static void main(String[] args) {
    if (args.length < 1) {
        System.out.printf("Usage: java %s tree_file%n", NegraPennTreeReaderFactory.class.getName());
        return;
    }
    TreebankLanguagePack tlp = new NegraPennLanguagePack();
    TreeReaderFactory trf = new NegraPennTreeReaderFactory(2, false, false, tlp);
    try {
        TreeReader tr = trf.newTreeReader(IOUtils.readerFromString(args[0], tlp.getEncoding()));
        for (Tree t; (t = tr.readTree()) != null; ) {
            t.pennPrint();
        }
        tr.close();
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Aggregations

TreebankLanguagePack (edu.stanford.nlp.trees.TreebankLanguagePack)15 Tree (edu.stanford.nlp.trees.Tree)10 TreePrint (edu.stanford.nlp.trees.TreePrint)5 TreeReader (edu.stanford.nlp.trees.TreeReader)3 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)3 PrintWriter (java.io.PrintWriter)3 NullOutputStream (edu.stanford.nlp.io.NullOutputStream)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 CoreLabel (edu.stanford.nlp.ling.CoreLabel)2 HasWord (edu.stanford.nlp.ling.HasWord)2 TaggedWord (edu.stanford.nlp.ling.TaggedWord)2 ParserQuery (edu.stanford.nlp.parser.common.ParserQuery)2 AbstractEval (edu.stanford.nlp.parser.metrics.AbstractEval)2 TaggingEval (edu.stanford.nlp.parser.metrics.TaggingEval)2 UnlabeledAttachmentEval (edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval)2 GrammaticalStructureFactory (edu.stanford.nlp.trees.GrammaticalStructureFactory)2 PennTreeReader (edu.stanford.nlp.trees.PennTreeReader)2 PennTreebankLanguagePack (edu.stanford.nlp.trees.PennTreebankLanguagePack)2 Treebank (edu.stanford.nlp.trees.Treebank)2 Pair (edu.stanford.nlp.util.Pair)2