Search in sources :

Example 41 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method safeHead.

private Tree safeHead(Tree top, int endIndex) {
    // The trees passed in do not have the CoordinationTransformer
    // applied, but that just means the SemanticHeadFinder results are
    // slightly worse.
    Tree head = top.headTerminal(headFinder);
    // One obscure failure case is that the added period becomes the head. Disallow this.
    if (head != null) {
        Integer headIndexInteger = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class);
        if (headIndexInteger != null) {
            int headIndex = headIndexInteger - 1;
            if (headIndex < endIndex) {
                return head;
            }
        }
    }
    // if no head found return the right-most leaf
    List<Tree> leaves = top.getLeaves();
    int candidate = leaves.size() - 1;
    while (candidate >= 0) {
        head = leaves.get(candidate);
        Integer headIndexInteger = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class);
        if (headIndexInteger != null) {
            int headIndex = headIndexInteger - 1;
            if (headIndex < endIndex) {
                return head;
            }
        }
        candidate--;
    }
    // fallback: return top
    return top;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 42 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DependencyCorefMentionFinder method extractMentionForHeadword.

private void extractMentionForHeadword(IndexedWord headword, SemanticGraph dep, CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    SemanticGraph basic = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhanced = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhanced == null) {
        enhanced = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    // pronoun
    if (headword.tag().startsWith("PRP")) {
        extractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet);
        return;
    }
    // add NP mention
    IntPair npSpan = getNPSpan(headword, dep, sent);
    int beginIdx = npSpan.get(0);
    int endIdx = npSpan.get(1) + 1;
    // try not to have span that ends with ,
    if (",".equals(sent.get(endIdx - 1).word())) {
        endIdx--;
    }
    // try to remove first IN.
    if ("IN".equals(sent.get(beginIdx).tag())) {
        beginIdx++;
    }
    addMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
    //
    // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above)
    //
    // to make sure we find the first conjunction
    Set<IndexedWord> conjChildren = dep.getChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.CONJUNCT);
    if (conjChildren.size() > 0) {
        IndexedWord conjChild = dep.getChildWithReln(headword, UniversalEnglishGrammaticalRelations.CONJUNCT);
        for (IndexedWord c : conjChildren) {
            if (c.index() < conjChild.index())
                conjChild = c;
        }
        IndexedWord left = SemanticGraphUtils.leftMostChildVertice(conjChild, dep);
        for (int endIdxFirstElement = left.index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--) {
            if (!sent.get(endIdxFirstElement - 1).tag().matches("CC|,")) {
                if (headword.index() - 1 < endIdxFirstElement) {
                    addMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
                }
                break;
            }
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) BasicDependenciesAnnotation(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) IntPair(edu.stanford.nlp.util.IntPair)

Example 43 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DependencyCorefMentionFinder method findHeadInDependency.

// TODO: still errors in head finder
public static void findHeadInDependency(CoreMap s, Mention m) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    SemanticGraph basicDep = s.get(BasicDependenciesAnnotation.class);
    if (m.headWord == null) {
        // when there's punctuation, no node found in the dependency tree
        int curIdx;
        IndexedWord cur = null;
        for (curIdx = m.endIndex - 1; curIdx >= m.startIndex; curIdx--) {
            if ((cur = basicDep.getNodeByIndexSafe(curIdx + 1)) != null)
                break;
        }
        if (cur == null)
            curIdx = m.endIndex - 1;
        while (cur != null) {
            IndexedWord p = basicDep.getParent(cur);
            if (p == null || p.index() - 1 < m.startIndex || p.index() - 1 >= m.endIndex)
                break;
            curIdx = p.index() - 1;
            cur = basicDep.getNodeByIndexSafe(curIdx + 1);
        }
        //      for(IndexedWord p : basicDep.getPathToRoot(basicDep.getNodeByIndex(curIdx+1))) {
        //        if(p.index()-1 < m.startIndex || p.index()-1 >= m.endIndex) {
        //          break;
        //        }
        //        curIdx = p.index()-1;
        //      }
        m.headIndex = curIdx;
        m.headWord = sent.get(m.headIndex);
        m.headString = m.headWord.word().toLowerCase(Locale.ENGLISH);
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 44 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method extractNPorPRP.

private static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        // try not to have span that ends with ,
        if (",".equals(sent.get(endIdx - 1).word())) {
            endIdx--;
        }
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
        if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP"))) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            if (m.originalSpan.size() > 1) {
                boolean isNE = true;
                for (CoreLabel cl : m.originalSpan) {
                    if (!cl.tag().startsWith("NNP"))
                        isNE = false;
                }
                if (isNE) {
                    namedEntitySpanSet.add(mSpan);
                }
            }
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 45 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method extractNamedEntityMentions.

protected static void extractNamedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    String preNE = "O";
    int beginIndex = -1;
    for (CoreLabel w : sent) {
        String nerString = w.ner();
        if (!nerString.equals(preNE)) {
            int endIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
            if (!preNE.matches("O")) {
                if (w.get(CoreAnnotations.TextAnnotation.class).equals("'s") && w.tag().equals("POS")) {
                    endIndex++;
                }
                IntPair mSpan = new IntPair(beginIndex, endIndex);
                // attached to the previous NER by the earlier heuristic
                if (beginIndex < endIndex && !mentionSpanSet.contains(mSpan)) {
                    int dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
                    mentions.add(m);
                    mentionSpanSet.add(mSpan);
                    namedEntitySpanSet.add(mSpan);
                }
            }
            beginIndex = endIndex;
            preNE = nerString;
        }
    }
    // NE at the end of sentence
    if (!preNE.matches("O")) {
        IntPair mSpan = new IntPair(beginIndex, sent.size());
        if (!mentionSpanSet.contains(mSpan)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIndex, sent.size(), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, sent.size())));
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            namedEntitySpanSet.add(mSpan);
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IntPair(edu.stanford.nlp.util.IntPair)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)536 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)311 CoreMap (edu.stanford.nlp.util.CoreMap)103 ArrayList (java.util.ArrayList)102 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)32 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)25 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 StringReader (java.io.StringReader)20 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18