Search in sources :

Example 1 with ThreadsafeProcessor

use of edu.stanford.nlp.util.concurrent.ThreadsafeProcessor in project CoreNLP by stanfordnlp.

the class HybridCorefSystem method runCoref.

public static void runCoref(Properties props) throws Exception {
    /*
    * property, environment setting
    */
    Redwood.hideChannelsEverywhere("debug-cluster", "debug-mention", "debug-preprocessor", "debug-docreader", "debug-mergethres", "debug-featureselection", "debug-md");
    int nThreads = HybridCorefProperties.getThreadCounts(props);
    String timeStamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
    Logger logger = Logger.getLogger(HybridCorefSystem.class.getName());
    // set log file path
    if (props.containsKey(HybridCorefProperties.LOG_PROP)) {
        File logFile = new File(props.getProperty(HybridCorefProperties.LOG_PROP));
        RedwoodConfiguration.current().handlers(RedwoodConfiguration.Handlers.file(logFile)).apply();
        Redwood.log("Starting coref log");
    }
    log.info(props.toString());
    if (HybridCorefProperties.checkMemory(props))
        checkMemoryUsage();
    HybridCorefSystem cs = new HybridCorefSystem(props);
    /*
       output setting
    */
    // prepare conll output
    String goldOutput = null;
    String beforeCorefOutput = null;
    String afterCorefOutput = null;
    PrintWriter writerGold = null;
    PrintWriter writerBeforeCoref = null;
    PrintWriter writerAfterCoref = null;
    if (HybridCorefProperties.doScore(props)) {
        String pathOutput = CorefProperties.conllOutputPath(props);
        (new File(pathOutput)).mkdir();
        goldOutput = pathOutput + "output-" + timeStamp + ".gold.txt";
        beforeCorefOutput = pathOutput + "output-" + timeStamp + ".predicted.txt";
        afterCorefOutput = pathOutput + "output-" + timeStamp + ".coref.predicted.txt";
        writerGold = new PrintWriter(new FileOutputStream(goldOutput));
        writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
        writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
    }
    // run coref
    MulticoreWrapper<Pair<Document, HybridCorefSystem>, StringBuilder[]> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<Pair<Document, HybridCorefSystem>, StringBuilder[]>() {

        @Override
        public StringBuilder[] process(Pair<Document, HybridCorefSystem> input) {
            try {
                Document document = input.first;
                HybridCorefSystem cs = input.second;
                // conll output and logs
                StringBuilder[] outputs = new StringBuilder[4];
                cs.coref(document, outputs);
                return outputs;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public ThreadsafeProcessor<Pair<Document, HybridCorefSystem>, StringBuilder[]> newInstance() {
            return this;
        }
    });
    Date startTime = null;
    if (HybridCorefProperties.checkTime(props)) {
        startTime = new Date();
        System.err.printf("END-TO-END COREF Start time: %s\n", startTime);
    }
    // run processes
    int docCnt = 0;
    while (true) {
        Document document = cs.docMaker.nextDoc();
        if (document == null)
            break;
        wrapper.put(Pair.makePair(document, cs));
        docCnt = logOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
    }
    // Finished reading the input. Wait for jobs to finish
    wrapper.join();
    docCnt = logOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
    IOUtils.closeIgnoringExceptions(writerGold);
    IOUtils.closeIgnoringExceptions(writerBeforeCoref);
    IOUtils.closeIgnoringExceptions(writerAfterCoref);
    if (HybridCorefProperties.checkTime(props)) {
        System.err.printf("END-TO-END COREF Elapsed time: %.3f seconds\n", (((new Date()).getTime() - startTime.getTime()) / 1000F));
    // System.err.printf("CORENLP PROCESS TIME TOTAL: %.3f seconds\n", cs.mentionExtractor.corenlpProcessTime);
    }
    if (HybridCorefProperties.checkMemory(props))
        checkMemoryUsage();
    // scoring
    if (HybridCorefProperties.doScore(props)) {
        String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, beforeCorefOutput);
        CorefScorer.printScoreSummary(summary, logger, false);
        summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput, afterCorefOutput);
        CorefScorer.printScoreSummary(summary, logger, true);
        CorefScorer.printFinalConllScore(summary, logger);
    }
}
Also used : ThreadsafeProcessor(edu.stanford.nlp.util.concurrent.ThreadsafeProcessor) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) Logger(java.util.logging.Logger) Document(edu.stanford.nlp.coref.data.Document) Date(java.util.Date) FileOutputStream(java.io.FileOutputStream) File(java.io.File) PrintWriter(java.io.PrintWriter) Pair(edu.stanford.nlp.util.Pair)

Example 2 with ThreadsafeProcessor

use of edu.stanford.nlp.util.concurrent.ThreadsafeProcessor in project CoreNLP by stanfordnlp.

the class MulticoreWrapperDemo method main.

/**
 * @param args Command-line arguments: modelFile (runs as a filter from stdin to stdout)
 */
public static void main(String[] args) {
    if (args.length != 1) {
        System.err.printf("Usage: java %s model_file < input_file%n", MulticoreWrapperDemo.class.getName());
        System.exit(-1);
    }
    try {
        // Load MaxentTagger, which is threadsafe
        String modelFile = args[0];
        final MaxentTagger tagger = new MaxentTagger(modelFile);
        // Configure to run with 4 worker threads
        int nThreads = 4;
        MulticoreWrapper<String, String> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<String, String>() {

            @Override
            public String process(String input) {
                return tagger.tagString(input);
            }

            @Override
            public ThreadsafeProcessor<String, String> newInstance() {
                // MaxentTagger is threadsafe
                return this;
            }
        });
        // Submit jobs, which come from stdin
        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        for (String line; (line = br.readLine()) != null; ) {
            wrapper.put(line);
            while (wrapper.peek()) {
                System.out.println(wrapper.poll());
            }
        }
        // Finished reading the input. Wait for jobs to finish
        wrapper.join();
        while (wrapper.peek()) {
            System.out.println(wrapper.poll());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : ThreadsafeProcessor(edu.stanford.nlp.util.concurrent.ThreadsafeProcessor) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Aggregations

MulticoreWrapper (edu.stanford.nlp.util.concurrent.MulticoreWrapper)2 ThreadsafeProcessor (edu.stanford.nlp.util.concurrent.ThreadsafeProcessor)2 Document (edu.stanford.nlp.coref.data.Document)1 MaxentTagger (edu.stanford.nlp.tagger.maxent.MaxentTagger)1 Pair (edu.stanford.nlp.util.Pair)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1 PrintWriter (java.io.PrintWriter)1 Date (java.util.Date)1 Logger (java.util.logging.Logger)1