use of edu.stanford.nlp.util.concurrent.MulticoreWrapper in project CoreNLP by stanfordnlp.
the class MulticoreWrapperDemo method main.
/**
* @param args Command-line arguments: modelFile (runs as a filter from stdin to stdout)
*/
public static void main(String[] args) {
if (args.length != 1) {
System.err.printf("Usage: java %s model_file < input_file%n", MulticoreWrapperDemo.class.getName());
System.exit(-1);
}
try {
// Load MaxentTagger, which is threadsafe
String modelFile = args[0];
final MaxentTagger tagger = new MaxentTagger(modelFile);
// Configure to run with 4 worker threads
int nThreads = 4;
MulticoreWrapper<String, String> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<String, String>() {
@Override
public String process(String input) {
return tagger.tagString(input);
}
@Override
public ThreadsafeProcessor<String, String> newInstance() {
// MaxentTagger is threadsafe
return this;
}
});
// Submit jobs, which come from stdin
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
for (String line; (line = br.readLine()) != null; ) {
wrapper.put(line);
while (wrapper.peek()) {
System.out.println(wrapper.poll());
}
}
// Finished reading the input. Wait for jobs to finish
wrapper.join();
while (wrapper.peek()) {
System.out.println(wrapper.poll());
}
} catch (IOException e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.util.concurrent.MulticoreWrapper in project CoreNLP by stanfordnlp.
the class MaxentTagger method runTagger.
public <X extends HasWord> void runTagger(Iterable<List<X>> document, BufferedWriter writer, OutputStyle outputStyle) throws IOException {
Timing t = new Timing();
//Counts
int numWords = 0;
int numSentences = 0;
boolean outputVerbosity = config.getOutputVerbosity();
boolean outputLemmas = config.getOutputLemmas();
if (outputStyle == OutputStyle.XML || outputStyle == OutputStyle.INLINE_XML) {
writer.write("<?xml version=\"1.0\" encoding=\"" + config.getEncoding() + "\"?>\n");
writer.write("<pos>\n");
}
if (config.getNThreads() != 1) {
MulticoreWrapper<List<? extends HasWord>, List<? extends HasWord>> wrapper = new MulticoreWrapper<>(config.getNThreads(), new SentenceTaggingProcessor(this, outputLemmas));
for (List<X> sentence : document) {
wrapper.put(sentence);
while (wrapper.peek()) {
List<? extends HasWord> taggedSentence = wrapper.poll();
numWords += taggedSentence.size();
outputTaggedSentence(taggedSentence, outputLemmas, outputStyle, outputVerbosity, numSentences, "\n", writer);
numSentences++;
}
}
wrapper.join();
while (wrapper.peek()) {
List<? extends HasWord> taggedSentence = wrapper.poll();
numWords += taggedSentence.size();
outputTaggedSentence(taggedSentence, outputLemmas, outputStyle, outputVerbosity, numSentences, "\n", writer);
numSentences++;
}
} else {
Morphology morpha = (outputLemmas) ? new Morphology() : null;
for (List<X> sentence : document) {
numWords += sentence.size();
tagAndOutputSentence(sentence, outputLemmas, morpha, outputStyle, outputVerbosity, numSentences, "\n", writer);
numSentences++;
}
}
if (outputStyle == OutputStyle.XML || outputStyle == OutputStyle.INLINE_XML) {
writer.write("</pos>\n");
}
writer.flush();
long millis = t.stop();
printErrWordsPerSec(millis, numWords);
}
use of edu.stanford.nlp.util.concurrent.MulticoreWrapper in project CoreNLP by stanfordnlp.
the class EvaluateTreebank method testOnTreebank.
/** Test the parser on a treebank. Parses will be written to stdout, and
* various other information will be written to stderr and stdout,
* particularly if <code>op.testOptions.verbose</code> is true.
*
* @param testTreebank The treebank to parse
* @return The labeled precision/recall F<sub>1</sub> (EVALB measure)
* of the parser on the treebank.
*/
public double testOnTreebank(Treebank testTreebank) {
log.info("Testing on treebank");
Timing treebankTotalTimer = new Timing();
TreePrint treePrint = op.testOptions.treePrint(op.tlpParams);
TreebankLangParserParams tlpParams = op.tlpParams;
TreebankLanguagePack tlp = op.langpack();
PrintWriter pwOut, pwErr;
if (op.testOptions.quietEvaluation) {
NullOutputStream quiet = new NullOutputStream();
pwOut = tlpParams.pw(quiet);
pwErr = tlpParams.pw(quiet);
} else {
pwOut = tlpParams.pw();
pwErr = tlpParams.pw(System.err);
}
if (op.testOptions.verbose) {
pwErr.print("Testing ");
pwErr.println(testTreebank.textualSummary(tlp));
}
if (op.testOptions.evalb) {
EvalbFormatWriter.initEVALBfiles(tlpParams);
}
PrintWriter pwFileOut = null;
if (op.testOptions.writeOutputFiles) {
String fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
try {
pwFileOut = op.tlpParams.pw(new FileOutputStream(fname));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
PrintWriter pwStats = null;
if (op.testOptions.outputkBestEquivocation != null) {
try {
pwStats = op.tlpParams.pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
if (op.testOptions.testingThreads != 1) {
MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
LinkedList<Tree> goldTrees = new LinkedList<>();
for (Tree goldTree : testTreebank) {
List<? extends HasWord> sentence = getInputSentence(goldTree);
goldTrees.add(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
wrapper.put(sentence);
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
}
// for tree iterator
wrapper.join();
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
Tree goldTree = goldTrees.poll();
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
} else {
ParserQuery pq = pqFactory.parserQuery();
for (Tree goldTree : testTreebank) {
final List<CoreLabel> sentence = getInputSentence(goldTree);
pwErr.println("Parsing [len. " + sentence.size() + "]: " + SentenceUtils.listToString(sentence));
pq.parseAndReport(sentence, pwErr);
processResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
}
// for tree iterator
}
//Done parsing...print the results of the evaluations
treebankTotalTimer.done("Testing on treebank");
if (op.testOptions.quietEvaluation) {
pwErr = tlpParams.pw(System.err);
}
if (saidMemMessage) {
ParserUtils.printOutOfMemory(pwErr);
}
if (op.testOptions.evalb) {
EvalbFormatWriter.closeEVALBfiles();
}
if (numSkippedEvals != 0) {
pwErr.printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
}
// only created here so we know what parser types are supported...
ParserQuery pq = pqFactory.parserQuery();
if (summary) {
if (pcfgLB != null)
pcfgLB.display(false, pwErr);
if (pcfgChildSpecific != null)
pcfgChildSpecific.display(false, pwErr);
if (pcfgLA != null)
pcfgLA.display(false, pwErr);
if (pcfgCB != null)
pcfgCB.display(false, pwErr);
if (pcfgDA != null)
pcfgDA.display(false, pwErr);
if (pcfgTA != null)
pcfgTA.display(false, pwErr);
if (pcfgLL != null && pq.getPCFGParser() != null)
pcfgLL.display(false, pwErr);
if (depDA != null)
depDA.display(false, pwErr);
if (depTA != null)
depTA.display(false, pwErr);
if (depLL != null && pq.getDependencyParser() != null)
depLL.display(false, pwErr);
if (factLB != null)
factLB.display(false, pwErr);
if (factChildSpecific != null)
factChildSpecific.display(false, pwErr);
if (factLA != null)
factLA.display(false, pwErr);
if (factCB != null)
factCB.display(false, pwErr);
if (factDA != null)
factDA.display(false, pwErr);
if (factTA != null)
factTA.display(false, pwErr);
if (factLL != null && pq.getFactoredParser() != null)
factLL.display(false, pwErr);
if (pcfgCatE != null)
pcfgCatE.display(false, pwErr);
for (Eval eval : evals) {
eval.display(false, pwErr);
}
for (BestOfTopKEval eval : topKEvals) {
eval.display(false, pwErr);
}
}
// these ones only have a display mode, so display if turned on!!
if (pcfgRUO != null)
pcfgRUO.display(true, pwErr);
if (pcfgCUO != null)
pcfgCUO.display(true, pwErr);
if (tsv) {
NumberFormat nf = new DecimalFormat("0.00");
pwErr.println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
if (factLB != null)
pwErr.print(nf.format(factLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && factDA != null)
pwErr.print(nf.format(factDA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(nf.format(factLB.getExactPercent()));
pwErr.print("\t");
if (pcfgLB != null)
pwErr.print(nf.format(pcfgLB.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getDependencyParser() != null && depDA != null)
pwErr.print(nf.format(depDA.getEvalbF1Percent()));
pwErr.print("\t");
if (pq.getPCFGParser() != null && factTA != null)
pwErr.print(nf.format(factTA.getEvalbF1Percent()));
pwErr.print("\t");
if (factLB != null)
pwErr.print(factLB.getNum());
pwErr.println();
}
double f1 = 0.0;
if (factLB != null) {
f1 = factLB.getEvalbF1();
}
//Close files (if necessary)
if (pwFileOut != null)
pwFileOut.close();
if (pwStats != null)
pwStats.close();
if (parserQueryEvals != null) {
for (ParserQueryEval parserQueryEval : parserQueryEvals) {
parserQueryEval.display(false, pwErr);
}
}
return f1;
}
use of edu.stanford.nlp.util.concurrent.MulticoreWrapper in project CoreNLP by stanfordnlp.
the class CacheParseHypotheses method main.
/**
* An example of a command line is
* <br>
* java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202
* <br>
* java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6
* <br>
* java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed 026-270,301-499,600-999
*/
public static void main(String[] args) throws IOException {
String parserModel = null;
String output = null;
List<Pair<String, FileFilter>> treebanks = Generics.newArrayList();
int dvKBest = 200;
int numThreads = 1;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-dvKBest")) {
dvKBest = Integer.valueOf(args[argIndex + 1]);
argIndex += 2;
continue;
}
if (args[argIndex].equalsIgnoreCase("-parser") || args[argIndex].equals("-model")) {
parserModel = args[argIndex + 1];
argIndex += 2;
continue;
}
if (args[argIndex].equalsIgnoreCase("-output")) {
output = args[argIndex + 1];
argIndex += 2;
continue;
}
if (args[argIndex].equalsIgnoreCase("-treebank")) {
Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-treebank");
argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
treebanks.add(treebankDescription);
continue;
}
if (args[argIndex].equalsIgnoreCase("-numThreads")) {
numThreads = Integer.valueOf(args[argIndex + 1]);
argIndex += 2;
continue;
}
throw new IllegalArgumentException("Unknown argument " + args[argIndex]);
}
if (parserModel == null) {
throw new IllegalArgumentException("Need to supply a parser model with -model");
}
if (output == null) {
throw new IllegalArgumentException("Need to supply an output filename with -output");
}
if (treebanks.size() == 0) {
throw new IllegalArgumentException("Need to supply a treebank with -treebank");
}
log.info("Writing output to " + output);
log.info("Loading parser model " + parserModel);
log.info("Writing " + dvKBest + " hypothesis trees for each tree");
LexicalizedParser parser = LexicalizedParser.loadModel(parserModel, "-dvKBest", Integer.toString(dvKBest));
CacheParseHypotheses cacher = new CacheParseHypotheses(parser);
TreeTransformer transformer = DVParser.buildTrainTransformer(parser.getOp());
List<Tree> sentences = new ArrayList<>();
for (Pair<String, FileFilter> description : treebanks) {
log.info("Reading trees from " + description.first);
Treebank treebank = parser.getOp().tlpParams.memoryTreebank();
treebank.loadPath(description.first, description.second);
treebank = treebank.transform(transformer);
sentences.addAll(treebank);
}
log.info("Processing " + sentences.size() + " trees");
List<Pair<Tree, byte[]>> cache = Generics.newArrayList();
transformer = new SynchronizedTreeTransformer(transformer);
MulticoreWrapper<Tree, Pair<Tree, byte[]>> wrapper = new MulticoreWrapper<>(numThreads, new CacheProcessor(cacher, parser, dvKBest, transformer));
for (Tree tree : sentences) {
wrapper.put(tree);
while (wrapper.peek()) {
cache.add(wrapper.poll());
if (cache.size() % 10 == 0) {
System.out.println("Processed " + cache.size() + " trees");
}
}
}
wrapper.join();
while (wrapper.peek()) {
cache.add(wrapper.poll());
if (cache.size() % 10 == 0) {
System.out.println("Processed " + cache.size() + " trees");
}
}
System.out.println("Finished processing " + cache.size() + " trees");
IOUtils.writeObjectToFile(cache, output);
}
use of edu.stanford.nlp.util.concurrent.MulticoreWrapper in project CoreNLP by stanfordnlp.
the class ShiftReduceParser method redoTags.
public static void redoTags(List<Tree> trees, Tagger tagger, int nThreads) {
if (nThreads == 1) {
for (Tree tree : trees) {
redoTags(tree, tagger);
}
} else {
MulticoreWrapper<Tree, Tree> wrapper = new MulticoreWrapper<>(nThreads, new RetagProcessor(tagger));
for (Tree tree : trees) {
wrapper.put(tree);
}
wrapper.join();
// trees are changed in place
}
}
Aggregations