Search in sources :

Example 61 with PrintWriter

use of java.io.PrintWriter in project CoreNLP by stanfordnlp.

the class HybridCorefSystem method runCoref.

public static void runCoref(Properties props) throws Exception {
    /*
    * property, environment setting
    */
    Redwood.hideChannelsEverywhere("debug-cluster", "debug-mention", "debug-preprocessor", "debug-docreader", "debug-mergethres", "debug-featureselection", "debug-md");
    int nThreads = HybridCorefProperties.getThreadCounts(props);
    String timeStamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
    Logger logger = Logger.getLogger(HybridCorefSystem.class.getName());
    // set log file path
    if (props.containsKey(HybridCorefProperties.LOG_PROP)) {
        File logFile = new File(props.getProperty(HybridCorefProperties.LOG_PROP));
        RedwoodConfiguration.current().handlers(RedwoodConfiguration.Handlers.file(logFile)).apply();
        Redwood.log("Starting coref log");
    }
    log.info(props.toString());
    if (HybridCorefProperties.checkMemory(props))
        checkMemoryUsage();
    HybridCorefSystem cs = new HybridCorefSystem(props);
    /*
       output setting
    */
    // prepare conll output
    String goldOutput = null;
    String beforeCorefOutput = null;
    String afterCorefOutput = null;
    PrintWriter writerGold = null;
    PrintWriter writerBeforeCoref = null;
    PrintWriter writerAfterCoref = null;
    if (HybridCorefProperties.doScore(props)) {
        String pathOutput = CorefProperties.conllOutputPath(props);
        (new File(pathOutput)).mkdir();
        goldOutput = pathOutput + "output-" + timeStamp + ".gold.txt";
        beforeCorefOutput = pathOutput + "output-" + timeStamp + ".predicted.txt";
        afterCorefOutput = pathOutput + "output-" + timeStamp + ".coref.predicted.txt";
        writerGold = new PrintWriter(new FileOutputStream(goldOutput));
        writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
        writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
    }
    // run coref
    MulticoreWrapper<Pair<Document, HybridCorefSystem>, StringBuilder[]> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<Pair<Document, HybridCorefSystem>, StringBuilder[]>() {

        @Override
        public StringBuilder[] process(Pair<Document, HybridCorefSystem> input) {
            try {
                Document document = input.first;
                HybridCorefSystem cs = input.second;
                // conll output and logs
                StringBuilder[] outputs = new StringBuilder[4];
                cs.coref(document, outputs);
                return outputs;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public ThreadsafeProcessor<Pair<Document, HybridCorefSystem>, StringBuilder[]> newInstance() {
            return this;
        }
    });
    Date startTime = null;
    if (HybridCorefProperties.checkTime(props)) {
        startTime = new Date();
        System.err.printf("END-TO-END COREF Start time: %s\n", startTime);
    }
    // run processes
    int docCnt = 0;
    while (true) {
        Document document = cs.docMaker.nextDoc();
        if (document == null)
            break;
        wrapper.put(Pair.makePair(document, cs));
        docCnt = logOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
    }
    // Finished reading the input. Wait for jobs to finish
    wrapper.join();
    docCnt = logOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
    IOUtils.closeIgnoringExceptions(writerGold);
    IOUtils.closeIgnoringExceptions(writerBeforeCoref);
    IOUtils.closeIgnoringExceptions(writerAfterCoref);
    if (HybridCorefProperties.checkTime(props)) {
        System.err.printf("END-TO-END COREF Elapsed time: %.3f seconds\n", (((new Date()).getTime() - startTime.getTime()) / 1000F));
    //      System.err.printf("CORENLP PROCESS TIME TOTAL: %.3f seconds\n", cs.mentionExtractor.corenlpProcessTime);
    }
    if (HybridCorefProperties.checkMemory(props))
        checkMemoryUsage();
    // scoring
    if (HybridCorefProperties.doScore(props)) {
        String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, beforeCorefOutput);
        CorefScorer.printScoreSummary(summary, logger, false);
        summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, afterCorefOutput);
        CorefScorer.printScoreSummary(summary, logger, true);
        CorefScorer.printFinalConllScore(summary);
    }
}
Also used : ThreadsafeProcessor(edu.stanford.nlp.util.concurrent.ThreadsafeProcessor) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) Logger(java.util.logging.Logger) Document(edu.stanford.nlp.coref.data.Document) Date(java.util.Date) FileOutputStream(java.io.FileOutputStream) File(java.io.File) PrintWriter(java.io.PrintWriter) Pair(edu.stanford.nlp.util.Pair)

Example 62 with PrintWriter

use of java.io.PrintWriter in project CoreNLP by stanfordnlp.

the class RVFDataset method toSummaryString.

public String toSummaryString() {
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw);
    pw.println("Number of data points: " + size());
    pw.print("Number of labels: " + labelIndex.size() + " [");
    Iterator<L> iter = labelIndex.iterator();
    while (iter.hasNext()) {
        pw.print(iter.next());
        if (iter.hasNext()) {
            pw.print(", ");
        }
    }
    pw.println("]");
    pw.println("Number of features (Phi(X) types): " + featureIndex.size());
    pw.println("Number of active feature types: " + numFeatureTypes());
    pw.println("Number of active feature tokens: " + numFeatureTokens());
    return sw.toString();
}
Also used : StringWriter(java.io.StringWriter) PrintWriter(java.io.PrintWriter)

Example 63 with PrintWriter

use of java.io.PrintWriter in project CoreNLP by stanfordnlp.

the class RVFDataset method writeSVMLightFormat.

/**
   * Write the dataset in SVM-light format to the file.
   *
   * A strict SVM-light format will be written, where labels and features are
   * both encoded as integers, using the label and feature indexes of this
   * dataset. Datasets written by this method can be read by
   * {@link #readSVMLightFormat(File)}.
   *
   * @param file The location where the dataset should be written.
   */
public void writeSVMLightFormat(File file) throws FileNotFoundException {
    PrintWriter writer = new PrintWriter(file);
    writeSVMLightFormat(writer);
    writer.close();
}
Also used : PrintWriter(java.io.PrintWriter)

Example 64 with PrintWriter

use of java.io.PrintWriter in project CoreNLP by stanfordnlp.

the class CorefScorer method getEvalSummary.

public static String getEvalSummary(String evalScript, String goldFile, String predictFile) throws IOException {
    ProcessBuilder process = new ProcessBuilder(evalScript, "all", goldFile, predictFile, "none");
    StringOutputStream errSos = new StringOutputStream();
    StringOutputStream outSos = new StringOutputStream();
    PrintWriter out = new PrintWriter(outSos);
    PrintWriter err = new PrintWriter(errSos);
    SystemUtils.run(process, out, err);
    out.close();
    err.close();
    String summary = outSos.toString();
    String errStr = errSos.toString();
    if (!errStr.isEmpty()) {
        summary += "\nERROR: " + errStr;
    }
    Pattern pattern = Pattern.compile("\\d+\\.\\d\\d\\d+");
    DecimalFormat df = new DecimalFormat("#.##");
    Matcher matcher = pattern.matcher(summary);
    while (matcher.find()) {
        String number = matcher.group();
        summary = summary.replaceFirst(number, df.format(Double.parseDouble(number)));
    }
    return summary;
}
Also used : StringOutputStream(edu.stanford.nlp.io.StringOutputStream) Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) DecimalFormat(java.text.DecimalFormat) PrintWriter(java.io.PrintWriter)

Example 65 with PrintWriter

use of java.io.PrintWriter in project CoreNLP by stanfordnlp.

the class CorefSystem method runOnConll.

public void runOnConll(Properties props) throws Exception {
    String baseName = CorefProperties.conllOutputPath(props) + Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
    String goldOutput = baseName + ".gold.txt";
    String beforeCorefOutput = baseName + ".predicted.txt";
    String afterCorefOutput = baseName + ".coref.predicted.txt";
    PrintWriter writerGold = new PrintWriter(new FileOutputStream(goldOutput));
    PrintWriter writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
    PrintWriter writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
    (new CorefDocumentProcessor() {

        @Override
        public void process(int id, Document document) {
            writerGold.print(CorefPrinter.printConllOutput(document, true));
            writerBeforeCoref.print(CorefPrinter.printConllOutput(document, false));
            long time = System.currentTimeMillis();
            corefAlgorithm.runCoref(document);
            if (verbose) {
                Redwood.log(getName(), "Coref took " + (System.currentTimeMillis() - time) / 1000.0 + "s");
            }
            CorefUtils.removeSingletonClusters(document);
            writerAfterCoref.print(CorefPrinter.printConllOutput(document, false, true));
        }

        @Override
        public void finish() throws Exception {
        }

        @Override
        public String getName() {
            return corefAlgorithm.getClass().getName();
        }
    }).run(docMaker);
    Logger logger = Logger.getLogger(CorefSystem.class.getName());
    String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, beforeCorefOutput);
    CorefScorer.printScoreSummary(summary, logger, false);
    summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, afterCorefOutput);
    CorefScorer.printScoreSummary(summary, logger, true);
    CorefScorer.printFinalConllScore(summary);
    writerGold.close();
    writerBeforeCoref.close();
    writerAfterCoref.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) Document(edu.stanford.nlp.coref.data.Document) Logger(java.util.logging.Logger) PrintWriter(java.io.PrintWriter)

Aggregations

PrintWriter (java.io.PrintWriter)4039 StringWriter (java.io.StringWriter)1201 IOException (java.io.IOException)788 File (java.io.File)643 Test (org.junit.Test)512 FileWriter (java.io.FileWriter)318 FileOutputStream (java.io.FileOutputStream)313 OutputStreamWriter (java.io.OutputStreamWriter)278 BufferedReader (java.io.BufferedReader)202 ArrayList (java.util.ArrayList)196 ByteArrayOutputStream (java.io.ByteArrayOutputStream)162 HttpServletResponse (javax.servlet.http.HttpServletResponse)145 InputStreamReader (java.io.InputStreamReader)140 Date (java.util.Date)131 HashMap (java.util.HashMap)130 ServletException (javax.servlet.ServletException)126 BufferedWriter (java.io.BufferedWriter)125 HttpServletRequest (javax.servlet.http.HttpServletRequest)125 FastPrintWriter (com.android.internal.util.FastPrintWriter)124 Map (java.util.Map)118