use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class HybridCorefSystem method runCoref.
public static void runCoref(Properties props) throws Exception {
/*
* property, environment setting
*/
Redwood.hideChannelsEverywhere("debug-cluster", "debug-mention", "debug-preprocessor", "debug-docreader", "debug-mergethres", "debug-featureselection", "debug-md");
int nThreads = HybridCorefProperties.getThreadCounts(props);
String timeStamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
Logger logger = Logger.getLogger(HybridCorefSystem.class.getName());
// set log file path
if (props.containsKey(HybridCorefProperties.LOG_PROP)) {
File logFile = new File(props.getProperty(HybridCorefProperties.LOG_PROP));
RedwoodConfiguration.current().handlers(RedwoodConfiguration.Handlers.file(logFile)).apply();
Redwood.log("Starting coref log");
}
log.info(props.toString());
if (HybridCorefProperties.checkMemory(props))
checkMemoryUsage();
HybridCorefSystem cs = new HybridCorefSystem(props);
/*
output setting
*/
// prepare conll output
String goldOutput = null;
String beforeCorefOutput = null;
String afterCorefOutput = null;
PrintWriter writerGold = null;
PrintWriter writerBeforeCoref = null;
PrintWriter writerAfterCoref = null;
if (HybridCorefProperties.doScore(props)) {
String pathOutput = CorefProperties.conllOutputPath(props);
(new File(pathOutput)).mkdir();
goldOutput = pathOutput + "output-" + timeStamp + ".gold.txt";
beforeCorefOutput = pathOutput + "output-" + timeStamp + ".predicted.txt";
afterCorefOutput = pathOutput + "output-" + timeStamp + ".coref.predicted.txt";
writerGold = new PrintWriter(new FileOutputStream(goldOutput));
writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
}
// run coref
MulticoreWrapper<Pair<Document, HybridCorefSystem>, StringBuilder[]> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<Pair<Document, HybridCorefSystem>, StringBuilder[]>() {
@Override
public StringBuilder[] process(Pair<Document, HybridCorefSystem> input) {
try {
Document document = input.first;
HybridCorefSystem cs = input.second;
// conll output and logs
StringBuilder[] outputs = new StringBuilder[4];
cs.coref(document, outputs);
return outputs;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public ThreadsafeProcessor<Pair<Document, HybridCorefSystem>, StringBuilder[]> newInstance() {
return this;
}
});
Date startTime = null;
if (HybridCorefProperties.checkTime(props)) {
startTime = new Date();
System.err.printf("END-TO-END COREF Start time: %s\n", startTime);
}
// run processes
int docCnt = 0;
while (true) {
Document document = cs.docMaker.nextDoc();
if (document == null)
break;
wrapper.put(Pair.makePair(document, cs));
docCnt = logOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
}
// Finished reading the input. Wait for jobs to finish
wrapper.join();
docCnt = logOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
IOUtils.closeIgnoringExceptions(writerGold);
IOUtils.closeIgnoringExceptions(writerBeforeCoref);
IOUtils.closeIgnoringExceptions(writerAfterCoref);
if (HybridCorefProperties.checkTime(props)) {
System.err.printf("END-TO-END COREF Elapsed time: %.3f seconds\n", (((new Date()).getTime() - startTime.getTime()) / 1000F));
// System.err.printf("CORENLP PROCESS TIME TOTAL: %.3f seconds\n", cs.mentionExtractor.corenlpProcessTime);
}
if (HybridCorefProperties.checkMemory(props))
checkMemoryUsage();
// scoring
if (HybridCorefProperties.doScore(props)) {
String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, beforeCorefOutput);
CorefScorer.printScoreSummary(summary, logger, false);
summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, afterCorefOutput);
CorefScorer.printScoreSummary(summary, logger, true);
CorefScorer.printFinalConllScore(summary);
}
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class RVFDataset method toSummaryString.
public String toSummaryString() {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
pw.println("Number of data points: " + size());
pw.print("Number of labels: " + labelIndex.size() + " [");
Iterator<L> iter = labelIndex.iterator();
while (iter.hasNext()) {
pw.print(iter.next());
if (iter.hasNext()) {
pw.print(", ");
}
}
pw.println("]");
pw.println("Number of features (Phi(X) types): " + featureIndex.size());
pw.println("Number of active feature types: " + numFeatureTypes());
pw.println("Number of active feature tokens: " + numFeatureTokens());
return sw.toString();
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class RVFDataset method writeSVMLightFormat.
/**
* Write the dataset in SVM-light format to the file.
*
* A strict SVM-light format will be written, where labels and features are
* both encoded as integers, using the label and feature indexes of this
* dataset. Datasets written by this method can be read by
* {@link #readSVMLightFormat(File)}.
*
* @param file The location where the dataset should be written.
*/
public void writeSVMLightFormat(File file) throws FileNotFoundException {
PrintWriter writer = new PrintWriter(file);
writeSVMLightFormat(writer);
writer.close();
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class CorefScorer method getEvalSummary.
public static String getEvalSummary(String evalScript, String goldFile, String predictFile) throws IOException {
ProcessBuilder process = new ProcessBuilder(evalScript, "all", goldFile, predictFile, "none");
StringOutputStream errSos = new StringOutputStream();
StringOutputStream outSos = new StringOutputStream();
PrintWriter out = new PrintWriter(outSos);
PrintWriter err = new PrintWriter(errSos);
SystemUtils.run(process, out, err);
out.close();
err.close();
String summary = outSos.toString();
String errStr = errSos.toString();
if (!errStr.isEmpty()) {
summary += "\nERROR: " + errStr;
}
Pattern pattern = Pattern.compile("\\d+\\.\\d\\d\\d+");
DecimalFormat df = new DecimalFormat("#.##");
Matcher matcher = pattern.matcher(summary);
while (matcher.find()) {
String number = matcher.group();
summary = summary.replaceFirst(number, df.format(Double.parseDouble(number)));
}
return summary;
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class CorefSystem method runOnConll.
public void runOnConll(Properties props) throws Exception {
String baseName = CorefProperties.conllOutputPath(props) + Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
String goldOutput = baseName + ".gold.txt";
String beforeCorefOutput = baseName + ".predicted.txt";
String afterCorefOutput = baseName + ".coref.predicted.txt";
PrintWriter writerGold = new PrintWriter(new FileOutputStream(goldOutput));
PrintWriter writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
PrintWriter writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
(new CorefDocumentProcessor() {
@Override
public void process(int id, Document document) {
writerGold.print(CorefPrinter.printConllOutput(document, true));
writerBeforeCoref.print(CorefPrinter.printConllOutput(document, false));
long time = System.currentTimeMillis();
corefAlgorithm.runCoref(document);
if (verbose) {
Redwood.log(getName(), "Coref took " + (System.currentTimeMillis() - time) / 1000.0 + "s");
}
CorefUtils.removeSingletonClusters(document);
writerAfterCoref.print(CorefPrinter.printConllOutput(document, false, true));
}
@Override
public void finish() throws Exception {
}
@Override
public String getName() {
return corefAlgorithm.getClass().getName();
}
}).run(docMaker);
Logger logger = Logger.getLogger(CorefSystem.class.getName());
String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, beforeCorefOutput);
CorefScorer.printScoreSummary(summary, logger, false);
summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, afterCorefOutput);
CorefScorer.printScoreSummary(summary, logger, true);
CorefScorer.printFinalConllScore(summary);
writerGold.close();
writerBeforeCoref.close();
writerAfterCoref.close();
}
Aggregations