Search in sources :

Example 6 with HasWord

use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.

the class DependencyParser method parseTextFile.

private void parseTextFile(BufferedReader input, PrintWriter output) {
    DocumentPreprocessor preprocessor = new DocumentPreprocessor(input);
    preprocessor.setSentenceFinalPuncWords(config.tlp.sentenceFinalPunctuationWords());
    preprocessor.setEscaper(config.escaper);
    preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
    preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());
    Timing timer = new Timing();
    MaxentTagger tagger = new MaxentTagger(config.tagger);
    List<List<TaggedWord>> tagged = new ArrayList<>();
    for (List<HasWord> sentence : preprocessor) {
        tagged.add(tagger.tagSentence(sentence));
    }
    System.err.printf("Tagging completed in %.2f sec.%n", timer.stop() / 1000.0);
    timer.start();
    int numSentences = 0;
    for (List<TaggedWord> taggedSentence : tagged) {
        GrammaticalStructure parse = predict(taggedSentence);
        Collection<TypedDependency> deps = parse.typedDependencies();
        for (TypedDependency dep : deps) output.println(dep);
        output.println();
        numSentences++;
    }
    long millis = timer.stop();
    double seconds = millis / 1000.0;
    System.err.printf("Parsed %d sentences in %.2f seconds (%.2f sents/sec).%n", numSentences, seconds, numSentences / seconds);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TypedDependency(edu.stanford.nlp.trees.TypedDependency) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) ChineseGrammaticalStructure(edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure) EnglishGrammaticalStructure(edu.stanford.nlp.trees.EnglishGrammaticalStructure) UniversalEnglishGrammaticalStructure(edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructure) Collectors.toList(java.util.stream.Collectors.toList) Timing(edu.stanford.nlp.util.Timing) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 7 with HasWord

use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.

the class DependencyParserDemo method main.

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    for (int argIndex = 0; argIndex < args.length; ) {
        switch(args[argIndex]) {
            case "-tagger":
                taggerPath = args[argIndex + 1];
                argIndex += 2;
                break;
            case "-model":
                modelPath = args[argIndex + 1];
                argIndex += 2;
                break;
            default:
                throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }
    String text = "I can almost always tell when movies use fake dinosaurs.";
    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        GrammaticalStructure gs = parser.predict(tagged);
        // Print typed dependencies
        log.info(gs);
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) DependencyParser(edu.stanford.nlp.parser.nndep.DependencyParser) StringReader(java.io.StringReader) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 8 with HasWord

use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.

the class ScrollableTreeJPanel method renderRows.

private void renderRows(Graphics2D g2, FontMetrics fM, Color defaultColor2) {
    double nodeHeight = fM.getHeight();
    double layerMultiplier = (1.0 + belowLineSkip + aboveLineSkip + parentSkip);
    double layerHeight = nodeHeight * layerMultiplier;
    //Draw the yield
    List<HasWord> sentence = tree.yieldHasWord();
    for (int i = 0; i < sentence.size(); i++) {
        g2.drawString(sentence.get(i).word(), yieldOffsets[i], (float) (yieldHeight + layerHeight));
    }
    //Greedily draw the constituents
    final float rowOrigin = (float) (yieldHeight + 2.0 * layerHeight);
    List<List<IntPair>> rows = new ArrayList<>();
    for (Constituent c : diffConstituents) {
        for (int rowIdx = 0; rowIdx < diffConstituents.size(); rowIdx++) {
            float rowHeight = rowOrigin + (float) (rowIdx * layerHeight);
            int ext = (c.end() == (yieldOffsets.length - 1)) ? 0 : 1;
            if (rowIdx >= rows.size()) {
                rows.add(new ArrayList<>());
                rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
                double nodeWidth = fM.stringWidth(c.value());
                g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
                try {
                    g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
                } catch (ArrayIndexOutOfBoundsException e) {
                // This happens if yield of two compared trees do not match.  Just ignore it for now
                // System.err.printf("yieldOffsets.length is %d, c.start() is %d, c.end() is %d, ext is %d%n", yieldOffsets.length, c.start(), c.end(), ext);
                }
                break;
            } else {
                boolean foundOverlap = false;
                for (IntPair span : rows.get(rowIdx)) {
                    if (doesOverlap(c, span)) {
                        foundOverlap = true;
                        break;
                    }
                }
                if (!foundOverlap) {
                    rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
                    double nodeWidth = fM.stringWidth(c.value());
                    g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
                    g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
                    break;
                }
            }
        }
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) IntPair(edu.stanford.nlp.util.IntPair) List(java.util.List) Constituent(edu.stanford.nlp.trees.Constituent)

Example 9 with HasWord

use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.

the class UniversalSemanticHeadFinder method isVerbalAuxiliary.

private boolean isVerbalAuxiliary(Tree preterminal, Set<String> verbalSet, boolean allowJustTagMatch) {
    if (preterminal.isPreTerminal()) {
        Label kidLabel = preterminal.label();
        String tag = null;
        if (kidLabel instanceof HasTag) {
            tag = ((HasTag) kidLabel).tag();
        }
        if (tag == null) {
            tag = preterminal.value();
        }
        Label wordLabel = preterminal.firstChild().label();
        String word = null;
        if (wordLabel instanceof HasWord) {
            word = ((HasWord) wordLabel).word();
        }
        if (word == null) {
            word = wordLabel.value();
        }
        if (DEBUG) {
            log.info("Checking " + preterminal.value() + " head is " + word + '/' + tag);
        }
        String lcWord = word.toLowerCase();
        if (allowJustTagMatch && unambiguousAuxiliaryTags.contains(tag) || verbalTags.contains(tag) && verbalSet.contains(lcWord)) {
            if (DEBUG) {
                log.info("isAuxiliary found desired type of aux");
            }
            return true;
        }
    }
    return false;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) Label(edu.stanford.nlp.ling.Label) HasTag(edu.stanford.nlp.ling.HasTag)

Example 10 with HasWord

use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.

the class SpanishXMLTreeReader method buildWordNode.

/**
   * Build a parse tree node corresponding to the word in the given XML node.
   */
private Tree buildWordNode(Node root) {
    Element eRoot = (Element) root;
    String posStr = getPOS(eRoot);
    posStr = treeNormalizer.normalizeNonterminal(posStr);
    String lemma = eRoot.getAttribute(ATTR_LEMMA);
    String word = getWord(eRoot);
    String leafStr = treeNormalizer.normalizeTerminal(word);
    Tree leafNode = treeFactory.newLeaf(leafStr);
    if (leafNode.label() instanceof HasWord)
        ((HasWord) leafNode.label()).setWord(leafStr);
    if (leafNode.label() instanceof HasLemma && lemma != null)
        ((HasLemma) leafNode.label()).setLemma(lemma);
    List<Tree> kids = new ArrayList<>();
    kids.add(leafNode);
    Tree t = treeFactory.newTreeNode(posStr, kids);
    if (t.label() instanceof HasTag)
        ((HasTag) t.label()).setTag(posStr);
    return t;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) HasLemma(edu.stanford.nlp.ling.HasLemma) Element(org.w3c.dom.Element) Tree(edu.stanford.nlp.trees.Tree) HasTag(edu.stanford.nlp.ling.HasTag)

Aggregations

HasWord (edu.stanford.nlp.ling.HasWord)57 CoreLabel (edu.stanford.nlp.ling.CoreLabel)17 TaggedWord (edu.stanford.nlp.ling.TaggedWord)15 ArrayList (java.util.ArrayList)14 HasTag (edu.stanford.nlp.ling.HasTag)13 Tree (edu.stanford.nlp.trees.Tree)13 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)11 StringReader (java.io.StringReader)11 Label (edu.stanford.nlp.ling.Label)10 Word (edu.stanford.nlp.ling.Word)10 List (java.util.List)8 BufferedReader (java.io.BufferedReader)6 MaxentTagger (edu.stanford.nlp.tagger.maxent.MaxentTagger)5 File (java.io.File)5 PrintWriter (java.io.PrintWriter)5 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)4 Pair (edu.stanford.nlp.util.Pair)4 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)3 HasIndex (edu.stanford.nlp.ling.HasIndex)3 Sentence (edu.stanford.nlp.ling.Sentence)3