Search in sources :

Example 26 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class MentionDetectionClassifier method extractFeatures.

public static Counter<String> extractFeatures(Mention p, Set<Mention> shares, Set<String> neStrings, Dictionaries dict, Properties props) {
    Counter<String> features = new ClassicCounter<>();
    String span = p.lowercaseNormalizedSpanString();
    String ner = p.headWord.ner();
    int sIdx = p.startIndex;
    int eIdx = p.endIndex;
    List<CoreLabel> sent = p.sentenceWords;
    CoreLabel preWord = (sIdx == 0) ? null : sent.get(sIdx - 1);
    CoreLabel nextWord = (eIdx == sent.size()) ? null : sent.get(eIdx);
    CoreLabel firstWord = p.originalSpan.get(0);
    CoreLabel lastWord = p.originalSpan.get(p.originalSpan.size() - 1);
    features.incrementCount("B-NETYPE-" + ner);
    if (neStrings.contains(span)) {
        features.incrementCount("B-NE-STRING-EXIST");
        if ((preWord == null || !preWord.ner().equals(ner)) && (nextWord == null || !nextWord.ner().equals(ner))) {
            features.incrementCount("B-NE-FULLSPAN");
        }
    }
    if (preWord != null)
        features.incrementCount("B-PRECEDINGWORD-" + preWord.word());
    if (nextWord != null)
        features.incrementCount("B-FOLLOWINGWORD-" + nextWord.word());
    if (preWord != null)
        features.incrementCount("B-PRECEDINGPOS-" + preWord.tag());
    if (nextWord != null)
        features.incrementCount("B-FOLLOWINGPOS-" + nextWord.tag());
    features.incrementCount("B-FIRSTWORD-" + firstWord.word());
    features.incrementCount("B-FIRSTPOS-" + firstWord.tag());
    features.incrementCount("B-LASTWORD-" + lastWord.word());
    features.incrementCount("B-LASTWORD-" + lastWord.tag());
    for (Mention s : shares) {
        if (s == p)
            continue;
        if (s.insideIn(p)) {
            features.incrementCount("B-BIGGER-THAN-ANOTHER");
            break;
        }
    }
    for (Mention s : shares) {
        if (s == p)
            continue;
        if (p.insideIn(s)) {
            features.incrementCount("B-SMALLER-THAN-ANOTHER");
            break;
        }
    }
    return features;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter)

Example 27 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class MentionDetectionClassifier method classifyMentions.

public void classifyMentions(List<List<Mention>> predictedMentions, Dictionaries dict, Properties props) {
    Set<String> neStrings = Generics.newHashSet();
    for (List<Mention> predictedMention : predictedMentions) {
        for (Mention m : predictedMention) {
            String ne = m.headWord.ner();
            if (ne.equals("O"))
                continue;
            for (CoreLabel cl : m.originalSpan) {
                if (!cl.ner().equals(ne))
                    continue;
            }
            neStrings.add(m.lowercaseNormalizedSpanString());
        }
    }
    for (List<Mention> predicts : predictedMentions) {
        Map<Integer, Set<Mention>> headPositions = Generics.newHashMap();
        for (Mention p : predicts) {
            if (!headPositions.containsKey(p.headIndex))
                headPositions.put(p.headIndex, Generics.newHashSet());
            headPositions.get(p.headIndex).add(p);
        }
        Set<Mention> remove = Generics.newHashSet();
        for (int hPos : headPositions.keySet()) {
            Set<Mention> shares = headPositions.get(hPos);
            if (shares.size() > 1) {
                Counter<Mention> probs = new ClassicCounter<>();
                for (Mention p : shares) {
                    double trueProb = probabilityOf(p, shares, neStrings, dict, props);
                    probs.incrementCount(p, trueProb);
                }
                // add to remove
                Mention keep = Counters.argmax(probs, (m1, m2) -> m1.spanToString().compareTo(m2.spanToString()));
                probs.remove(keep);
                remove.addAll(probs.keySet());
            }
        }
        for (Mention r : remove) {
            predicts.remove(r);
        }
    }
}
Also used : Set(java.util.Set) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter)

Example 28 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractNamedEntityMentions.

protected static void extractNamedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    String preNE = "O";
    int beginIndex = -1;
    for (CoreLabel w : sent) {
        String nerString = w.ner();
        if (!nerString.equals(preNE)) {
            int endIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
            if (!preNE.matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) {
                if (w.get(CoreAnnotations.TextAnnotation.class).equals("'s") && w.tag().equals("POS")) {
                    endIndex++;
                }
                IntPair mSpan = new IntPair(beginIndex, endIndex);
                // attached to the previous NER by the earlier heuristic
                if (beginIndex < endIndex && !mentionSpanSet.contains(mSpan)) {
                    int dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
                    mentions.add(m);
                    mentionSpanSet.add(mSpan);
                    namedEntitySpanSet.add(mSpan);
                }
            }
            beginIndex = endIndex;
            preNE = nerString;
        }
    }
    // NE at the end of sentence
    if (!preNE.matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) {
        IntPair mSpan = new IntPair(beginIndex, sent.size());
        if (!mentionSpanSet.contains(mSpan)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIndex, sent.size(), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, sent.size())));
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            namedEntitySpanSet.add(mSpan);
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IntPair(edu.stanford.nlp.util.IntPair)

Example 29 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractNPorPRP.

public void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        //if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        if (!mentionSpanSet.contains(mSpan) && (lang == Locale.CHINESE || !insideNE(mSpan, namedEntitySpanSet))) {
            //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
        //        if(m.originalSpan.size() > 1) {
        //          boolean isNE = true;
        //          for(CoreLabel cl : m.originalSpan) {
        //            if(!cl.tag().startsWith("NNP")) isNE = false;
        //          }
        //          if(isNE) {
        //            namedEntitySpanSet.add(mSpan);
        //          }
        //        }
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 30 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method removeSpuriousMentionsEn.

/** Filter out all spurious mentions
   */
@Override
public void removeSpuriousMentionsEn(Annotation doc, List<List<Mention>> predictedMentions, Dictionaries dict) {
    Set<String> standAlones = new HashSet<>();
    List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
    for (int i = 0; i < predictedMentions.size(); i++) {
        CoreMap s = sentences.get(i);
        List<Mention> mentions = predictedMentions.get(i);
        Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
        List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
        Set<Mention> remove = Generics.newHashSet();
        for (Mention m : mentions) {
            String headPOS = m.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            String headNE = m.headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            // pleonastic it
            if (isPleonastic(m, tree)) {
                remove.add(m);
            }
            // non word such as 'hmm'
            if (dict.nonWords.contains(m.headString))
                remove.add(m);
            // quantRule : not starts with 'any', 'all' etc
            if (m.originalSpan.size() > 0) {
                String firstWord = m.originalSpan.get(0).get(CoreAnnotations.TextAnnotation.class).toLowerCase(Locale.ENGLISH);
                if (firstWord.matches("none|no|nothing|not")) {
                    remove.add(m);
                }
            //          if(dict.quantifiers.contains(firstWord)) remove.add(m);
            }
            // partitiveRule
            if (partitiveRule(m, sent, dict)) {
                remove.add(m);
            }
            // bareNPRule
            if (headPOS.equals("NN") && !dict.temporals.contains(m.headString) && (m.originalSpan.size() == 1 || m.originalSpan.get(0).get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("JJ"))) {
                remove.add(m);
            }
            if (m.headString.equals("%")) {
                remove.add(m);
            }
            if (headNE.equals("PERCENT") || headNE.equals("MONEY")) {
                remove.add(m);
            }
            // check if the mention is noun and the next word is not noun
            if (dict.isAdjectivalDemonym(m.spanToString())) {
                remove.add(m);
            }
            // stop list (e.g., U.S., there)
            if (inStopList(m))
                remove.add(m);
        }
        // nested mention with shared headword (except apposition, enumeration): pick larger one
        for (Mention m1 : mentions) {
            for (Mention m2 : mentions) {
                if (m1 == m2 || remove.contains(m1) || remove.contains(m2))
                    continue;
                if (m1.sentNum == m2.sentNum && m1.headWord == m2.headWord && m2.insideIn(m1)) {
                    if (m2.endIndex < sent.size() && (sent.get(m2.endIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).equals(",") || sent.get(m2.endIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CC"))) {
                        continue;
                    }
                    remove.add(m2);
                }
            }
        }
        mentions.removeAll(remove);
    }
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) HashSet(java.util.HashSet)

Aggregations

Mention (edu.stanford.nlp.coref.data.Mention)62 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)27 CoreLabel (edu.stanford.nlp.ling.CoreLabel)27 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)21 ArrayList (java.util.ArrayList)20 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)17 CoreMap (edu.stanford.nlp.util.CoreMap)17 List (java.util.List)15 Tree (edu.stanford.nlp.trees.Tree)14 IntPair (edu.stanford.nlp.util.IntPair)14 CorefCluster (edu.stanford.nlp.coref.data.CorefCluster)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)9 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)7 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)7 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)7 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)6 Map (java.util.Map)6