use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method testOnTreebank.
/** Test the parser on a treebank. Parses will be written to stdout, and
* various other information will be written to stderr and stdout,
* particularly if <code>op.testOptions.verbose</code> is true.
*
* @param testTreebank The treebank to parse
* @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
* of the parser on the treebank.
*/
public double testOnTreebank(Treebank testTreebank) {
log.info("Testing on treebank");
Timing treebankTotalTimer = new Timing();
TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
TreebankLangParserParams tlpParams = op.tlpParams;
TreebankLanguagePack tlp = op.langpack();
PrintWriter pwOut, pwErr;
if (op.testOptions.quietEvaluation) {
NullOutputStream quiet = new NullOutputStream();
pwOut = tlpParams.pw(quiet);
pwErr = tlpParams.pw(quiet);
} else {
pwOut = tlpParams.pw();
pwErr = tlpParams.pw(System.err);
}
if (op.testOptions.verbose) {
pwErr.print("Testing ");
pwErr.println(testTreebank.textualSummary(tlp));
}
if (op.testOptions.evalb) {
EvalbFormatWriter.initEVALBfiles(tlpParams);
}
PrintWriter pwFileOut = null;
if (op.testOptions.writeOutputFiles) {
String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
try {
pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
PrintWriter pwStats = null;
if (op.testOptions.outputkBestEquivocation != null) {
try {
pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
if (op.testOptions.testingThreads != 1) {
MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
LinkedList<Tree> goldTrees = new LinkedList<>();
for (Tree goldTree : testTreebank) {
List<? extends HasWord> sentence = getInputSentence(goldTree);
goldTrees.add(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
wrapper.put(sentence);
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
}
// for tree iterator
wrapper.join();
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
Tree goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
} else {
ParserQuery pq = pqFactory.parserQuery();
for (Tree goldTree : testTreebank) {
final List<CoreLabel> sentence = getInputSentence(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
pq.parseAndReport(sentence, pwErr);
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
// for tree iterator
}
//Done parsing...print the results of the evaluations
treebankTotalTimer.done("Testing on treebank");
if (op.testOptions.quietEvaluation) {
pwErr = tlpParams.pw(System.err);
}
if (saidMemMessage) {
ParserUtils.printOutOfMemory(pwErr);
}
if (op.testOptions.evalb) {
EvalbFormatWriter.closeEVALBfiles();
}
if (numSkippedEvals != 0) {
pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
}
// only created here so we know what parser types are supported...
ParserQuery pq = pqFactory.parserQuery();
if (summary) {
if (pcfgLB != null)
pcfgLB.display(false, pwErr);
if (pcfgChildSpecific != null)
pcfgChildSpecific.display(false, pwErr);
if (pcfgLA != null)
pcfgLA.display(false, pwErr);
if (pcfgCB != null)
pcfgCB.display(false, pwErr);
if (pcfgDA != null)
pcfgDA.display(false, pwErr);
if (pcfgTA != null)
pcfgTA.display(false, pwErr);
if (pcfgLL != null && pq.getPCFGParser() != null)
pcfgLL.display(false, pwErr);
if (depDA != null)
depDA.display(false, pwErr);
if (depTA != null)
depTA.display(false, pwErr);
if (depLL != null && pq.getDependencyParser() != null)
depLL.display(false, pwErr);
if (factLB != null)
factLB.display(false, pwErr);
if (factChildSpecific != null)
factChildSpecific.display(false, pwErr);
if (factLA != null)
factLA.display(false, pwErr);
if (factCB != null)
factCB.display(false, pwErr);
if (factDA != null)
factDA.display(false, pwErr);
if (factTA != null)
factTA.display(false, pwErr);
if (factLL != null && pq.getFactoredParser() != null)
factLL.display(false, pwErr);
if (pcfgCatE != null)
pcfgCatE.display(false, pwErr);
for (Eval eval : evals) {
eval.display(false, pwErr);
}
for (BestOfTopKEval eval : topKEvals) {
eval.display(false, pwErr);
}
}
// these ones only have a display mode, so display if turned on!!
if (pcfgRUO != null)
pcfgRUO.display(true, pwErr);
if (pcfgCUO != null)
pcfgCUO.display(true, pwErr);
if (tsv) {
NumberFormat nf = new DecimalFormat("0.00");
pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
if (factLB != null)
pwErr.print(nf.format(factLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && factDA != null)
pwErr.print(nf.format(factDA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(nf.format(factLB.getExactPercent()));
pwErr.print("\t");
if (pcfgLB != null)
pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && depDA != null)
pwErr.print(nf.format(depDA.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getPCFGParser() != null && factTA != null)
pwErr.print(nf.format(factTA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(factLB.getNum());
pwErr.println();
}
double f1 = 0.0;
if (factLB != null) {
f1 = factLB.getEvalbF1();
}
//Close files (if necessary)
if (pwFileOut != null)
pwFileOut.close();
if (pwStats != null)
pwStats.close();
if (parserQueryEvals != null) {
for (ParserQueryEval parserQueryEval : parserQueryEvals) {
parserQueryEval.display(false, pwErr);
}
}
return f1;
}
use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.
the class HebrewTreeReaderFactory method main.
/**
* @param args
*/
public static void main(String[] args) {
if (args.length != 1) {
System.err.printf("Usage: java %s tree_file > trees%n", HebrewTreeReaderFactory.class.getName());
System.exit(-1);
}
TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
File treeFile = new File(args[0]);
try {
TreeReaderFactory trf = new HebrewTreeReaderFactory();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
TreeReader tr = trf.newTreeReader(br);
int numTrees = 0;
for (Tree t; ((t = tr.readTree()) != null); numTrees++) System.out.println(t.toString());
tr.close();
System.err.printf("Processed %d trees.%n", numTrees);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method testOnTreebank.
public double testOnTreebank(EvaluationDataset testTreebank) {
log.info("Testing on treebank");
Timing treebankTotalTimer = new Timing();
TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
TreebankLangParserParams tlpParams = op.tlpParams;
TreebankLanguagePack tlp = op.langpack();
PrintWriter pwOut, pwErr;
if (op.testOptions.quietEvaluation) {
NullOutputStream quiet = new NullOutputStream();
pwOut = tlpParams.pw(quiet);
pwErr = tlpParams.pw(quiet);
} else {
pwOut = tlpParams.pw();
pwErr = tlpParams.pw(System.err);
}
if (op.testOptions.verbose) {
testTreebank.summarize(pwErr, tlp);
}
if (op.testOptions.evalb) {
EvalbFormatWriter.initEVALBfiles(tlpParams);
}
PrintWriter pwFileOut = null;
if (op.testOptions.writeOutputFiles) {
String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
try {
pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
PrintWriter pwStats = null;
if (op.testOptions.outputkBestEquivocation != null) {
try {
pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
List<Pair<ParserQuery, Tree>> results = testTreebank.dataset(pwErr, pwOut, pwFileOut, pwStats, treePrint);
for (Pair<ParserQuery, Tree> result : results) {
ParserQuery pq = result.first;
Tree goldTree = result.second;
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
// Done parsing...print the results of the evaluations
treebankTotalTimer.done("Testing on treebank");
if (op.testOptions.quietEvaluation) {
pwErr = tlpParams.pw(System.err);
}
if (saidMemMessage) {
ParserUtils.printOutOfMemory(pwErr);
}
if (op.testOptions.evalb) {
EvalbFormatWriter.closeEVALBfiles();
}
if (numSkippedEvals != 0) {
pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
}
// only created here so we know what parser types are supported...
// TODO: pass in the various pcfgparser, dependencyparser, etc?
ParserQuery pq = pqFactory != null ? pqFactory.parserQuery() : null;
if (summary) {
if (pcfgLB != null)
pcfgLB.display(false, pwErr);
if (pcfgChildSpecific != null)
pcfgChildSpecific.display(false, pwErr);
if (pcfgLA != null)
pcfgLA.display(false, pwErr);
if (pcfgCB != null)
pcfgCB.display(false, pwErr);
if (pcfgDA != null)
pcfgDA.display(false, pwErr);
if (pcfgTA != null)
pcfgTA.display(false, pwErr);
if (pcfgLL != null && pq != null && pq.getPCFGParser() != null)
pcfgLL.display(false, pwErr);
if (depDA != null)
depDA.display(false, pwErr);
if (depTA != null)
depTA.display(false, pwErr);
if (depLL != null && pq != null && pq.getDependencyParser() != null)
depLL.display(false, pwErr);
if (factLB != null)
factLB.display(false, pwErr);
if (factChildSpecific != null)
factChildSpecific.display(false, pwErr);
if (factLA != null)
factLA.display(false, pwErr);
if (factCB != null)
factCB.display(false, pwErr);
if (factDA != null)
factDA.display(false, pwErr);
if (factTA != null)
factTA.display(false, pwErr);
if (factLL != null && pq != null && pq.getFactoredParser() != null)
factLL.display(false, pwErr);
if (pcfgCatE != null)
pcfgCatE.display(false, pwErr);
for (Eval eval : evals) {
eval.display(false, pwErr);
}
for (BestOfTopKEval eval : topKEvals) {
eval.display(false, pwErr);
}
}
// these ones only have a display mode, so display if turned on!!
if (pcfgRUO != null)
pcfgRUO.display(true, pwErr);
if (pcfgCUO != null)
pcfgCUO.display(true, pwErr);
if (tsv) {
NumberFormat nf = new DecimalFormat("0.00");
pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
if (factLB != null)
pwErr.print(nf.format(factLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq != null && pq.getDependencyParser() != null && factDA != null)
pwErr.print(nf.format(factDA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(nf.format(factLB.getExactPercent()));
pwErr.print("\t");
if (pcfgLB != null)
pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq != null && pq.getDependencyParser() != null && depDA != null)
pwErr.print(nf.format(depDA.getEvalbF1Percent()));
pwErr.print("\t");
if (pq != null && pq.getPCFGParser() != null && factTA != null)
pwErr.print(nf.format(factTA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(factLB.getNum());
pwErr.println();
}
double f1 = 0.0;
if (factLB != null) {
f1 = factLB.getEvalbF1();
}
// Close files (if necessary)
if (pwFileOut != null)
pwFileOut.close();
if (pwStats != null)
pwStats.close();
for (ParserQueryEval parserQueryEval : parserQueryEvals) {
parserQueryEval.display(false, pwErr);
}
return f1;
}
use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.
the class LexicalizedParserQuery method addSentenceFinalPunctIfNeeded.
/**
* Adds a sentence final punctuation mark to sentences that lack one.
* This method adds a period (the first sentence final punctuation word
* in a parser language pack) to sentences that don't have one within
* the last 3 words (to allow for close parentheses, etc.). It checks
* tags for punctuation, if available, otherwise words.
*
* @param sentence The sentence to check
* @param length The length of the sentence (just to avoid recomputation)
*/
private boolean addSentenceFinalPunctIfNeeded(List<HasWord> sentence, int length) {
int start = length - 3;
if (start < 0)
start = 0;
TreebankLanguagePack tlp = op.tlpParams.treebankLanguagePack();
for (int i = length - 1; i >= start; i--) {
HasWord item = sentence.get(i);
// An object (e.g., CoreLabel) can implement HasTag but not actually store
// a tag so we need to check that there is something there for this case.
// If there is, use only it, since word tokens can be ambiguous.
String tag = null;
if (item instanceof HasTag) {
tag = ((HasTag) item).tag();
}
if (tag != null && !tag.isEmpty()) {
if (tlp.isSentenceFinalPunctuationTag(tag)) {
return false;
}
} else {
String str = item.word();
if (tlp.isPunctuationWord(str)) {
return false;
}
}
}
// none found so add one.
if (op.testOptions.verbose) {
log.info("Adding missing final punctuation to sentence.");
}
String[] sfpWords = tlp.sentenceFinalPunctuationWords();
if (sfpWords.length > 0) {
sentence.add(new Word(sfpWords[0]));
}
return true;
}
use of edu.stanford.nlp.trees.TreebankLanguagePack in project CoreNLP by stanfordnlp.
the class NegraPennTreeReaderFactory method main.
/**
* @param args File to run on
*/
public static void main(String[] args) {
if (args.length < 1) {
System.out.printf("Usage: java %s tree_file%n", NegraPennTreeReaderFactory.class.getName());
return;
}
TreebankLanguagePack tlp = new NegraPennLanguagePack();
TreeReaderFactory trf = new NegraPennTreeReaderFactory(2, false, false, tlp);
try {
TreeReader tr = trf.newTreeReader(IOUtils.readerFromString(args[0], tlp.getEncoding()));
for (Tree t; (t = tr.readTree()) != null; ) {
t.pennPrint();
}
tr.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations