Search in sources :

Example 1 with MaxentTagger

use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.

the class DependencyParser method parseTextFile.

private void parseTextFile(BufferedReader input, PrintWriter output) {
    DocumentPreprocessor preprocessor = new DocumentPreprocessor(input);
    preprocessor.setSentenceFinalPuncWords(config.tlp.sentenceFinalPunctuationWords());
    preprocessor.setEscaper(config.escaper);
    preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
    preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());
    Timing timer = new Timing();
    MaxentTagger tagger = new MaxentTagger(config.tagger);
    List<List<TaggedWord>> tagged = new ArrayList<>();
    for (List<HasWord> sentence : preprocessor) {
        tagged.add(tagger.tagSentence(sentence));
    }
    System.err.printf("Tagging completed in %.2f sec.%n", timer.stop() / 1000.0);
    timer.start();
    int numSentences = 0;
    for (List<TaggedWord> taggedSentence : tagged) {
        GrammaticalStructure parse = predict(taggedSentence);
        Collection<TypedDependency> deps = parse.typedDependencies();
        for (TypedDependency dep : deps) output.println(dep);
        output.println();
        numSentences++;
    }
    long millis = timer.stop();
    double seconds = millis / 1000.0;
    System.err.printf("Parsed %d sentences in %.2f seconds (%.2f sents/sec).%n", numSentences, seconds, numSentences / seconds);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TypedDependency(edu.stanford.nlp.trees.TypedDependency) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) ChineseGrammaticalStructure(edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure) EnglishGrammaticalStructure(edu.stanford.nlp.trees.EnglishGrammaticalStructure) UniversalEnglishGrammaticalStructure(edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructure) Collectors.toList(java.util.stream.Collectors.toList) Timing(edu.stanford.nlp.util.Timing) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 2 with MaxentTagger

use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.

the class DependencyParserDemo method main.

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    for (int argIndex = 0; argIndex < args.length; ) {
        switch(args[argIndex]) {
            case "-tagger":
                taggerPath = args[argIndex + 1];
                argIndex += 2;
                break;
            case "-model":
                modelPath = args[argIndex + 1];
                argIndex += 2;
                break;
            default:
                throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }
    String text = "I can almost always tell when movies use fake dinosaurs.";
    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        GrammaticalStructure gs = parser.predict(tagged);
        // Print typed dependencies
        log.info(gs);
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) DependencyParser(edu.stanford.nlp.parser.nndep.DependencyParser) StringReader(java.io.StringReader) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 3 with MaxentTagger

use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.

the class TaggerParserPosTagCompatibilityITest method testTagSet4.

private static void testTagSet4(String[] lexParsers, String[] maxentTaggers, String[] srParsers, String[] nnDepParsers) {
    LexicalizedParser lp = LexicalizedParser.loadModel(lexParsers[0]);
    Set<String> tagSet = lp.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction());
    for (String name : maxentTaggers) {
        MaxentTagger tagger = new MaxentTagger(name);
        assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, tagger.tagSet()) + "; right - left: " + Sets.diff(tagger.tagSet(), tagSet) + "\n", tagSet, tagger.tagSet());
    }
    for (String name : lexParsers) {
        LexicalizedParser lp2 = LexicalizedParser.loadModel(name);
        assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction())) + "; right - left: " + Sets.diff(lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()), tagSet) + "\n", tagSet, lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()));
    }
    for (String name : srParsers) {
        ShiftReduceParser srp = ShiftReduceParser.loadModel(name);
        assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, srp.tagSet()) + "; right - left: " + Sets.diff(srp.tagSet(), tagSet) + "\n", tagSet, srp.tagSet());
    }
    for (String name : nnDepParsers) {
        DependencyParser dp = DependencyParser.loadFromModelFile(name);
        assertEquals(lexParsers[0] + " vs. " + name + " tag set mismatch:\n" + "left - right: " + Sets.diff(tagSet, dp.getPosSet()) + "; right - left: " + Sets.diff(dp.getPosSet(), tagSet) + "\n", tagSet, dp.getPosSet());
    }
}
Also used : MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) ShiftReduceParser(edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser) DependencyParser(edu.stanford.nlp.parser.nndep.DependencyParser) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser)

Example 4 with MaxentTagger

use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.

the class ShiftReduceParserITest method setUp.

@Override
public void setUp() {
    synchronized (ShiftReduceParserITest.class) {
        if (englishParser == null) {
            englishParser = ShiftReduceParser.loadModel("edu/stanford/nlp/models/srparser/englishSR.ser.gz");
            englishTagger = new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
        }
    }
}
Also used : MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger)

Example 5 with MaxentTagger

use of edu.stanford.nlp.tagger.maxent.MaxentTagger in project CoreNLP by stanfordnlp.

the class MulticoreWrapperDemo method main.

/**
   * @param args Command-line arguments: modelFile (runs as a filter from stdin to stdout)
   */
public static void main(String[] args) {
    if (args.length != 1) {
        System.err.printf("Usage: java %s model_file < input_file%n", MulticoreWrapperDemo.class.getName());
        System.exit(-1);
    }
    try {
        // Load MaxentTagger, which is threadsafe
        String modelFile = args[0];
        final MaxentTagger tagger = new MaxentTagger(modelFile);
        // Configure to run with 4 worker threads
        int nThreads = 4;
        MulticoreWrapper<String, String> wrapper = new MulticoreWrapper<>(nThreads, new ThreadsafeProcessor<String, String>() {

            @Override
            public String process(String input) {
                return tagger.tagString(input);
            }

            @Override
            public ThreadsafeProcessor<String, String> newInstance() {
                // MaxentTagger is threadsafe
                return this;
            }
        });
        // Submit jobs, which come from stdin
        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        for (String line; (line = br.readLine()) != null; ) {
            wrapper.put(line);
            while (wrapper.peek()) {
                System.out.println(wrapper.poll());
            }
        }
        // Finished reading the input. Wait for jobs to finish
        wrapper.join();
        while (wrapper.peek()) {
            System.out.println(wrapper.poll());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : ThreadsafeProcessor(edu.stanford.nlp.util.concurrent.ThreadsafeProcessor) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Aggregations

MaxentTagger (edu.stanford.nlp.tagger.maxent.MaxentTagger)10 HasWord (edu.stanford.nlp.ling.HasWord)5 TaggedWord (edu.stanford.nlp.ling.TaggedWord)5 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)4 BufferedReader (java.io.BufferedReader)3 DependencyParser (edu.stanford.nlp.parser.nndep.DependencyParser)2 ShiftReduceParser (edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser)2 GrammaticalStructure (edu.stanford.nlp.trees.GrammaticalStructure)2 InputStreamReader (java.io.InputStreamReader)2 StringReader (java.io.StringReader)2 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)1 CoreLabelTokenFactory (edu.stanford.nlp.process.CoreLabelTokenFactory)1 EnglishGrammaticalStructure (edu.stanford.nlp.trees.EnglishGrammaticalStructure)1 Tree (edu.stanford.nlp.trees.Tree)1 TypedDependency (edu.stanford.nlp.trees.TypedDependency)1 UniversalEnglishGrammaticalStructure (edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructure)1 ChineseGrammaticalStructure (edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure)1 Timing (edu.stanford.nlp.util.Timing)1 MulticoreWrapper (edu.stanford.nlp.util.concurrent.MulticoreWrapper)1