use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.
the class DependencyParser method parseTextFile.
private void parseTextFile(BufferedReader input, PrintWriter output) {
DocumentPreprocessor preprocessor = new DocumentPreprocessor(input);
preprocessor.setSentenceFinalPuncWords(config.tlp.sentenceFinalPunctuationWords());
preprocessor.setEscaper(config.escaper);
preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());
Timing timer = new Timing();
MaxentTagger tagger = new MaxentTagger(config.tagger);
List<List<TaggedWord>> tagged = new ArrayList<>();
for (List<HasWord> sentence : preprocessor) {
tagged.add(tagger.tagSentence(sentence));
}
System.err.printf("Tagging completed in %.2f sec.%n", timer.stop() / 1000.0);
timer.start();
int numSentences = 0;
for (List<TaggedWord> taggedSentence : tagged) {
GrammaticalStructure parse = predict(taggedSentence);
Collection<TypedDependency> deps = parse.typedDependencies();
for (TypedDependency dep : deps) output.println(dep);
output.println();
numSentences++;
}
long millis = timer.stop();
double seconds = millis / 1000.0;
System.err.printf("Parsed %d sentences in %.2f seconds (%.2f sents/sec).%n", numSentences, seconds, numSentences / seconds);
}
use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.
the class DependencyParserDemo method main.
public static void main(String[] args) {
String modelPath = DependencyParser.DEFAULT_MODEL;
String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
for (int argIndex = 0; argIndex < args.length; ) {
switch(args[argIndex]) {
case "-tagger":
taggerPath = args[argIndex + 1];
argIndex += 2;
break;
case "-model":
modelPath = args[argIndex + 1];
argIndex += 2;
break;
default:
throw new RuntimeException("Unknown argument " + args[argIndex]);
}
}
String text = "I can almost always tell when movies use fake dinosaurs.";
MaxentTagger tagger = new MaxentTagger(taggerPath);
DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged = tagger.tagSentence(sentence);
GrammaticalStructure gs = parser.predict(tagged);
// Print typed dependencies
log.info(gs);
}
}
use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.
the class TaggerParserPosTagCompatibilityITest method testTagSet4.
private static void testTagSet4(String[] lexParsers, String[] maxentTaggers, String[] srParsers, String[] nnDepParsers) {
LexicalizedParser lp = LexicalizedParser.loadModel(lexParsers[0]);
Set<String> tagSet = lp.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction());
for (String name : maxentTaggers) {
MaxentTagger tagger = new MaxentTagger(name);
assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, tagger.tagSet()) + "; right - left: " + Sets.diff(tagger.tagSet(), tagSet) + "\n", tagSet, tagger.tagSet());
}
for (String name : lexParsers) {
LexicalizedParser lp2 = LexicalizedParser.loadModel(name);
assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction())) + "; right - left: " + Sets.diff(lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()), tagSet) + "\n", tagSet, lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()));
}
for (String name : srParsers) {
ShiftReduceParser srp = ShiftReduceParser.loadModel(name);
assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, srp.tagSet()) + "; right - left: " + Sets.diff(srp.tagSet(), tagSet) + "\n", tagSet, srp.tagSet());
}
for (String name : nnDepParsers) {
DependencyParser dp = DependencyParser.loadFromModelFile(name);
assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, dp.getPosSet()) + "; right - left: " + Sets.diff(dp.getPosSet(), tagSet) + "\n", tagSet, dp.getPosSet());
}
}
use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.
the class ShiftReduceParserITest method setUp.
@Override
public void setUp() {
synchronized (ShiftReduceParserITest.class) {
if (englishParser == null) {
englishParser = ShiftReduceParser.loadModel("edu/stanford/nlp/models/srparser/englishSR.ser.gz");
englishTagger = new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
}
}
}
use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.
the class MulticoreWrapperDemo method main.
/**
* @param args Command-line arguments: modelFile (runs as a filter from stdin to stdout)
*/
public static void main(String[] args) {
if (args.length != 1) {
System.err.printf("Usage: java %s model_file < input_file%n", MulticoreWrapperDemo.class.getName());
System.exit(-1);
}
try {
// Load MaxentTagger, which is threadsafe
String modelFile = args[0];
final MaxentTagger tagger = new MaxentTagger(modelFile);
// Configure to run with 4 worker threads
int nThreads = 4;
MulticoreWrapper<String, String> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<String, String>() {
@Override
public String process(String input) {
return tagger.tagString(input);
}
@Override
public ThreadsafeProcessor<String, String> newInstance() {
// MaxentTagger is threadsafe
return this;
}
});
// Submit jobs, which come from stdin
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
for (String line; (line = br.readLine()) != null; ) {
wrapper.put(line);
while (wrapper.peek()) {
System.out.println(wrapper.poll());
}
}
// Finished reading the input. Wait for jobs to finish
wrapper.join();
while (wrapper.peek()) {
System.out.println(wrapper.poll());
}
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations