Search in sources :

Example 16 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method sentenceStringWithMention.

public static String sentenceStringWithMention(int i, Document document, boolean gold, boolean printClusterID) {
    StringBuilder sentStr = new StringBuilder();
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldMentions;
    } else {
        allMentions = document.predictedMentions;
    }
    //    String filename = document.annotation.get()
    int previousOffset = 0;
    CoreMap sentence = sentences.get(i);
    List<Mention> mentions = allMentions.get(i);
    List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
    String speaker = t.get(0).get(SpeakerAnnotation.class);
    if (NumberMatchingRegex.isDecimalInteger(speaker))
        speaker = speaker + ": " + document.predictedMentionsByID.get(Integer.parseInt(speaker)).spanToString();
    sentStr.append("\tspeaker: " + speaker + " (" + t.get(0).get(UtteranceAnnotation.class) + ") ");
    String[] tokens = new String[t.size()];
    for (CoreLabel c : t) {
        tokens[c.index() - 1] = c.word();
    }
    //    if(previousOffset+2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && printClusterID) {
    //      sentStr.append("\n");
    //    }
    previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Counter<Integer> startCounts = new ClassicCounter<>();
    Counter<Integer> endCounts = new ClassicCounter<>();
    Map<Integer, Deque<Mention>> endMentions = Generics.newHashMap();
    for (Mention m : mentions) {
        //      if(!gold && (document.corefClusters.get(m.corefClusterID)==null || document.corefClusters.get(m.corefClusterID).getCorefMentions().size()<=1)) {
        //        continue;
        //      }
        startCounts.incrementCount(m.startIndex);
        endCounts.incrementCount(m.endIndex);
        if (!endMentions.containsKey(m.endIndex))
            endMentions.put(m.endIndex, new ArrayDeque<>());
        endMentions.get(m.endIndex).push(m);
    }
    for (int j = 0; j < tokens.length; j++) {
        if (endMentions.containsKey(j)) {
            for (Mention m : endMentions.get(j)) {
                int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                id = (printClusterID) ? id : m.mentionID;
                sentStr.append("]_").append(id);
            }
        }
        for (int k = 0; k < startCounts.getCount(j); k++) {
            if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
                sentStr.append(" ");
            sentStr.append("[");
        }
        if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
            sentStr.append(" ");
        sentStr.append(tokens[j]);
    }
    if (endMentions.containsKey(tokens.length)) {
        for (Mention m : endMentions.get(tokens.length)) {
            int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
            id = (printClusterID) ? id : m.mentionID;
            //append("_").append(m.mentionID);
            sentStr.append("]_").append(id);
        }
    }
    return sentStr.toString();
}
Also used : UtteranceAnnotation(edu.stanford.nlp.ling.CoreAnnotations.UtteranceAnnotation) Deque(java.util.Deque) ArrayDeque(java.util.ArrayDeque) ArrayDeque(java.util.ArrayDeque) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 17 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method extractNPorPRP.

private static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        // try not to have span that ends with ,
        if (",".equals(sent.get(endIdx - 1).word())) {
            endIdx--;
        }
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
        if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP"))) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            if (m.originalSpan.size() > 1) {
                boolean isNE = true;
                for (CoreLabel cl : m.originalSpan) {
                    if (!cl.tag().startsWith("NNP"))
                        isNE = false;
                }
                if (isNE) {
                    namedEntitySpanSet.add(mSpan);
                }
            }
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 18 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method extractNamedEntityMentions.

protected static void extractNamedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    String preNE = "O";
    int beginIndex = -1;
    for (CoreLabel w : sent) {
        String nerString = w.ner();
        if (!nerString.equals(preNE)) {
            int endIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
            if (!preNE.matches("O")) {
                if (w.get(CoreAnnotations.TextAnnotation.class).equals("'s") && w.tag().equals("POS")) {
                    endIndex++;
                }
                IntPair mSpan = new IntPair(beginIndex, endIndex);
                // attached to the previous NER by the earlier heuristic
                if (beginIndex < endIndex && !mentionSpanSet.contains(mSpan)) {
                    int dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
                    mentions.add(m);
                    mentionSpanSet.add(mSpan);
                    namedEntitySpanSet.add(mSpan);
                }
            }
            beginIndex = endIndex;
            preNE = nerString;
        }
    }
    // NE at the end of sentence
    if (!preNE.matches("O")) {
        IntPair mSpan = new IntPair(beginIndex, sent.size());
        if (!mentionSpanSet.contains(mSpan)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIndex, sent.size(), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, sent.size())));
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            namedEntitySpanSet.add(mSpan);
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IntPair(edu.stanford.nlp.util.IntPair)

Example 19 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method findMentions.

@Override
public List<List<Mention>> findMentions(Annotation doc, Dictionaries dict, Properties props) {
    List<List<Mention>> predictedMentions = new ArrayList<>();
    Set<String> neStrings = Generics.newHashSet();
    List<Set<IntPair>> mentionSpanSetList = Generics.newArrayList();
    List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
    // extract premarked mentions, NP/PRP, named entity, enumerations
    for (CoreMap s : sentences) {
        List<Mention> mentions = new ArrayList<>();
        predictedMentions.add(mentions);
        Set<IntPair> mentionSpanSet = Generics.newHashSet();
        Set<IntPair> namedEntitySpanSet = Generics.newHashSet();
        extractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
        extractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
        extractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet);
        extractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet);
        addNamedEntityStrings(s, neStrings, namedEntitySpanSet);
        mentionSpanSetList.add(mentionSpanSet);
    }
    extractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings);
    // find head
    for (int i = 0; i < sentences.size(); i++) {
        findHead(sentences.get(i), predictedMentions.get(i));
    }
    // mention selection based on document-wise info
    removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang);
    // if this is for MD training, skip classification
    if (!CorefProperties.isMentionDetectionTraining(props)) {
        mdClassifier.classifyMentions(predictedMentions, dict, props);
    }
    return predictedMentions;
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) IntPair(edu.stanford.nlp.util.IntPair) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 20 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefSystem method postProcessing.

/** Remove singletons, appositive, predicate nominatives, relative pronouns. */
private static void postProcessing(Document document) {
    Set<Mention> removeSet = Generics.newHashSet();
    Set<Integer> removeClusterSet = Generics.newHashSet();
    for (CorefCluster c : document.corefClusters.values()) {
        Set<Mention> removeMentions = Generics.newHashSet();
        for (Mention m : c.getCorefMentions()) {
            if (HybridCorefProperties.REMOVE_APPOSITION_PREDICATENOMINATIVES && ((m.appositions != null && m.appositions.size() > 0) || (m.predicateNominatives != null && m.predicateNominatives.size() > 0) || (m.relativePronouns != null && m.relativePronouns.size() > 0))) {
                removeMentions.add(m);
                removeSet.add(m);
                m.corefClusterID = m.mentionID;
            }
        }
        c.corefMentions.removeAll(removeMentions);
        if (HybridCorefProperties.REMOVE_SINGLETONS && c.getCorefMentions().size() == 1) {
            removeClusterSet.add(c.clusterID);
        }
    }
    for (int removeId : removeClusterSet) {
        document.corefClusters.remove(removeId);
    }
    for (Mention m : removeSet) {
        document.positions.remove(m);
    }
}
Also used : CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention)

Aggregations

Mention (edu.stanford.nlp.coref.data.Mention)62 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)27 CoreLabel (edu.stanford.nlp.ling.CoreLabel)27 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)21 ArrayList (java.util.ArrayList)20 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)17 CoreMap (edu.stanford.nlp.util.CoreMap)17 List (java.util.List)15 Tree (edu.stanford.nlp.trees.Tree)14 IntPair (edu.stanford.nlp.util.IntPair)14 CorefCluster (edu.stanford.nlp.coref.data.CorefCluster)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)9 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)7 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)7 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)7 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)6 Map (java.util.Map)6