Search in sources :

Example 1 with GrammaticalStructure

use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.

the class DependencyParser method parseTextFile.

private void parseTextFile(BufferedReader input, PrintWriter output) {
    DocumentPreprocessor preprocessor = new DocumentPreprocessor(input);
    preprocessor.setSentenceFinalPuncWords(config.tlp.sentenceFinalPunctuationWords());
    preprocessor.setEscaper(config.escaper);
    preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
    preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());
    Timing timer = new Timing();
    MaxentTagger tagger = new MaxentTagger(config.tagger);
    List<List<TaggedWord>> tagged = new ArrayList<>();
    for (List<HasWord> sentence : preprocessor) {
        tagged.add(tagger.tagSentence(sentence));
    }
    System.err.printf("Tagging completed in %.2f sec.%n", timer.stop() / 1000.0);
    timer.start();
    int numSentences = 0;
    for (List<TaggedWord> taggedSentence : tagged) {
        GrammaticalStructure parse = predict(taggedSentence);
        Collection<TypedDependency> deps = parse.typedDependencies();
        for (TypedDependency dep : deps) output.println(dep);
        output.println();
        numSentences++;
    }
    long millis = timer.stop();
    double seconds = millis / 1000.0;
    System.err.printf("Parsed %d sentences in %.2f seconds (%.2f sents/sec).%n", numSentences, seconds, numSentences / seconds);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TypedDependency(edu.stanford.nlp.trees.TypedDependency) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) ChineseGrammaticalStructure(edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure) EnglishGrammaticalStructure(edu.stanford.nlp.trees.EnglishGrammaticalStructure) UniversalEnglishGrammaticalStructure(edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructure) Collectors.toList(java.util.stream.Collectors.toList) Timing(edu.stanford.nlp.util.Timing) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 2 with GrammaticalStructure

use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.

the class DependencyParserDemo method main.

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    for (int argIndex = 0; argIndex < args.length; ) {
        switch(args[argIndex]) {
            case "-tagger":
                taggerPath = args[argIndex + 1];
                argIndex += 2;
                break;
            case "-model":
                modelPath = args[argIndex + 1];
                argIndex += 2;
                break;
            default:
                throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }
    String text = "I can almost always tell when movies use fake dinosaurs.";
    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        GrammaticalStructure gs = parser.predict(tagged);
        // Print typed dependencies
        log.info(gs);
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) DependencyParser(edu.stanford.nlp.parser.nndep.DependencyParser) StringReader(java.io.StringReader) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 3 with GrammaticalStructure

use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.

the class DependencyIndexITest method checkTree.

private static void checkTree(Tree tree) {
    List<Tree> leaves = tree.getLeaves();
    for (Tree leaf : leaves) {
        CoreLabel l = null;
        if (leaf.label() instanceof CoreLabel)
            l = (CoreLabel) leaf.label();
        if (l != null) {
            // System.err.println(l + " " + l.get(CoreAnnotations.IndexAnnotation.class));
            int index = l.get(CoreAnnotations.IndexAnnotation.class);
            String text = l.get(CoreAnnotations.TextAnnotation.class);
            if (text.equals("Mary"))
                assertEquals(1, index);
            else if (text.equals("had"))
                assertEquals(2, index);
            else if (text.equals("a"))
                assertEquals(3, index);
            else if (text.equals("little"))
                assertEquals(4, index);
            else if (text.equals("lamb"))
                assertEquals(5, index);
            else if (text.equals("."))
                assertEquals(6, index);
        } else {
        // System.err.println(leaf + " is not a CoreLabel.");
        }
    }
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> deps = gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL);
    // System.out.println(deps);
    // collect all nodes in deps
    Set<IndexedWord> nodes = Generics.newHashSet();
    for (TypedDependency dep : deps) {
        nodes.add(dep.gov());
        nodes.add(dep.dep());
    }
    // check the indices for all nodes
    for (IndexedWord n : nodes) {
        String text = n.value();
        int index = n.get(CoreAnnotations.IndexAnnotation.class);
        if (text.equals("Mary"))
            assertEquals(1, index);
        else if (text.equals("had"))
            assertEquals(2, index);
        else if (text.equals("a"))
            assertEquals(3, index);
        else if (text.equals("little"))
            assertEquals(4, index);
        else if (text.equals("lamb"))
            assertEquals(5, index);
        else if (text.equals("."))
            assertEquals(6, index);
    }
}
Also used : TypedDependency(edu.stanford.nlp.trees.TypedDependency) PennTreebankLanguagePack(edu.stanford.nlp.trees.PennTreebankLanguagePack) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) GrammaticalStructureFactory(edu.stanford.nlp.trees.GrammaticalStructureFactory) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) PennTreebankLanguagePack(edu.stanford.nlp.trees.PennTreebankLanguagePack) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 4 with GrammaticalStructure

use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.

the class DependencyParseAnnotator method doOneSentence.

@Override
protected void doOneSentence(Annotation annotation, CoreMap sentence) {
    GrammaticalStructure gs = parser.predict(sentence);
    SemanticGraph deps = SemanticGraphFactory.makeFromTree(gs, Mode.COLLAPSED, extraDependencies, null), uncollapsedDeps = SemanticGraphFactory.makeFromTree(gs, Mode.BASIC, extraDependencies, null), ccDeps = SemanticGraphFactory.makeFromTree(gs, Mode.CCPROCESSED, extraDependencies, null), enhancedDeps = SemanticGraphFactory.makeFromTree(gs, Mode.ENHANCED, extraDependencies, null), enhancedPlusPlusDeps = SemanticGraphFactory.makeFromTree(gs, Mode.ENHANCED_PLUS_PLUS, extraDependencies, null);
    sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
    sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
    sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
    sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedDeps);
    sentence.set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, enhancedPlusPlusDeps);
}
Also used : GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 5 with GrammaticalStructure

use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.

the class LexicalizedParserServer method handleDependencies.

// TODO: when this method throws an exception (for whatever reason)
// a waiting client might hang.  There should be some graceful
// handling of that.
public void handleDependencies(String arg, OutputStream outStream, String commandArgs) throws IOException {
    Tree tree = parse(arg, false);
    if (tree == null) {
        return;
    }
    // TODO: this might throw an exception if the parser doesn't support dependencies.  Handle that cleaner?
    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(tree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder());
    Collection<TypedDependency> deps = null;
    switch(commandArgs.toUpperCase()) {
        case "COLLAPSED_TREE":
            deps = gs.typedDependenciesCollapsedTree();
            break;
        default:
            throw new UnsupportedOperationException("Dependencies type not implemented: " + commandArgs);
    }
    OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");
    for (TypedDependency dep : deps) {
        osw.write(dep.toString());
        osw.write("\n");
    }
    osw.flush();
}
Also used : TypedDependency(edu.stanford.nlp.trees.TypedDependency) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) Tree(edu.stanford.nlp.trees.Tree) OutputStreamWriter(java.io.OutputStreamWriter)

Aggregations

GrammaticalStructure (edu.stanford.nlp.trees.GrammaticalStructure)6 Tree (edu.stanford.nlp.trees.Tree)3 TypedDependency (edu.stanford.nlp.trees.TypedDependency)3 HasWord (edu.stanford.nlp.ling.HasWord)2 TaggedWord (edu.stanford.nlp.ling.TaggedWord)2 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)2 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)2 MaxentTagger (edu.stanford.nlp.tagger.maxent.MaxentTagger)2 GrammaticalStructureFactory (edu.stanford.nlp.trees.GrammaticalStructureFactory)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 IndexedWord (edu.stanford.nlp.ling.IndexedWord)1 EnglishTreebankParserParams (edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams)1 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)1 DependencyParser (edu.stanford.nlp.parser.nndep.DependencyParser)1 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)1 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)1 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)1 EnglishGrammaticalStructure (edu.stanford.nlp.trees.EnglishGrammaticalStructure)1 PennTreebankLanguagePack (edu.stanford.nlp.trees.PennTreebankLanguagePack)1