Search in sources :

Example 51 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class CoNLLMentionExtractor method extractGoldMentions.

public List<List<Mention>> extractGoldMentions(CoNLL2011DocumentReader.Document conllDoc) {
    List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allGoldMentions = new ArrayList<>();
    CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
    for (int i = 0; i < sentences.size(); i++) {
        allGoldMentions.add(new ArrayList<>());
    }
    int maxCorefClusterId = -1;
    for (String corefIdStr : corefChainMap.keySet()) {
        int id = Integer.parseInt(corefIdStr);
        if (id > maxCorefClusterId) {
            maxCorefClusterId = id;
        }
    }
    int newMentionID = maxCorefClusterId + 1;
    for (Map.Entry<String, Collection<CoreMap>> idChainEntry : corefChainMap.entrySet()) {
        int id = Integer.parseInt(idChainEntry.getKey());
        int clusterMentionCnt = 0;
        for (CoreMap m : idChainEntry.getValue()) {
            clusterMentionCnt++;
            Mention mention = new Mention();
            mention.goldCorefClusterID = id;
            if (clusterMentionCnt == 1) {
                // First mention in cluster
                mention.mentionID = id;
                mention.originalRef = -1;
            } else {
                mention.mentionID = newMentionID;
                mention.originalRef = id;
                newMentionID++;
            }
            if (maxID < mention.mentionID)
                maxID = mention.mentionID;
            int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class);
            CoreMap sent = sentences.get(sentIndex);
            mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            // will be set by arrange
            mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class);
            // Mention dependency graph is the enhanced dependency graph of the sentence
            mention.dependency = sentences.get(sentIndex).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
            allGoldMentions.get(sentIndex).add(mention);
        }
    }
    return allGoldMentions;
}
Also used : ArrayList(java.util.ArrayList) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap) CollectionValuedMap(edu.stanford.nlp.util.CollectionValuedMap) Map(java.util.Map) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 52 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method removeSpuriousMentionsEn.

/** Filter out all spurious mentions
   */
@Override
public void removeSpuriousMentionsEn(Annotation doc, List<List<Mention>> predictedMentions, Dictionaries dict) {
    Set<String> standAlones = new HashSet<>();
    List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
    for (int i = 0; i < predictedMentions.size(); i++) {
        CoreMap s = sentences.get(i);
        List<Mention> mentions = predictedMentions.get(i);
        Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
        List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
        Set<Mention> remove = Generics.newHashSet();
        for (Mention m : mentions) {
            String headPOS = m.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            String headNE = m.headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            // pleonastic it
            if (isPleonastic(m, tree)) {
                remove.add(m);
            }
            // non word such as 'hmm'
            if (dict.nonWords.contains(m.headString))
                remove.add(m);
            // quantRule : not starts with 'any', 'all' etc
            if (m.originalSpan.size() > 0) {
                String firstWord = m.originalSpan.get(0).get(CoreAnnotations.TextAnnotation.class).toLowerCase(Locale.ENGLISH);
                if (firstWord.matches("none|no|nothing|not")) {
                    remove.add(m);
                }
            //          if(dict.quantifiers.contains(firstWord)) remove.add(m);
            }
            // partitiveRule
            if (partitiveRule(m, sent, dict)) {
                remove.add(m);
            }
            // bareNPRule
            if (headPOS.equals("NN") && !dict.temporals.contains(m.headString) && (m.originalSpan.size() == 1 || m.originalSpan.get(0).get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("JJ"))) {
                remove.add(m);
            }
            if (m.headString.equals("%")) {
                remove.add(m);
            }
            if (headNE.equals("PERCENT") || headNE.equals("MONEY")) {
                remove.add(m);
            }
            // check if the mention is noun and the next word is not noun
            if (dict.isAdjectivalDemonym(m.spanToString())) {
                remove.add(m);
            }
            // stop list (e.g., U.S., there)
            if (inStopList(m))
                remove.add(m);
        }
        // nested mention with shared headword (except apposition, enumeration): pick larger one
        for (Mention m1 : mentions) {
            for (Mention m2 : mentions) {
                if (m1 == m2 || remove.contains(m1) || remove.contains(m2))
                    continue;
                if (m1.sentNum == m2.sentNum && m1.headWord == m2.headWord && m2.insideIn(m1)) {
                    if (m2.endIndex < sent.size() && (sent.get(m2.endIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).equals(",") || sent.get(m2.endIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CC"))) {
                        continue;
                    }
                    remove.add(m2);
                }
            }
        }
        mentions.removeAll(remove);
    }
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) HashSet(java.util.HashSet)

Example 53 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method filterPredictedMentions.

/** When mention boundaries are given */
public List<List<Mention>> filterPredictedMentions(List<List<Mention>> allGoldMentions, Annotation doc, Dictionaries dict, Properties props) {
    List<List<Mention>> predictedMentions = new ArrayList<>();
    for (int i = 0; i < allGoldMentions.size(); i++) {
        CoreMap s = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
        List<Mention> goldMentions = allGoldMentions.get(i);
        List<Mention> mentions = new ArrayList<>();
        predictedMentions.add(mentions);
        mentions.addAll(goldMentions);
        findHead(s, mentions);
        // todo [cdm 2013]: This block seems to do nothing - the two sets are never used
        Set<IntPair> mentionSpanSet = Generics.newHashSet();
        Set<IntPair> namedEntitySpanSet = Generics.newHashSet();
        for (Mention m : mentions) {
            mentionSpanSet.add(new IntPair(m.startIndex, m.endIndex));
            if (!m.headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class).equals("O")) {
                namedEntitySpanSet.add(new IntPair(m.startIndex, m.endIndex));
            }
        }
        setBarePlural(mentions);
    }
    removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang);
    return predictedMentions;
}
Also used : Mention(edu.stanford.nlp.coref.data.Mention) ArrayList(java.util.ArrayList) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap) IntPair(edu.stanford.nlp.util.IntPair)

Example 54 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class Document method findDocType.

/** Find document type: Conversation or article  */
private DocType findDocType(Dictionaries dict) {
    boolean speakerChange = false;
    Set<Integer> discourseWithIorYou = Generics.newHashSet();
    for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
            int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class);
            if (utterIndex != 0)
                speakerChange = true;
            if (speakerChange && utterIndex == 0)
                return DocType.ARTICLE;
            if (dict.firstPersonPronouns.contains(w.get(CoreAnnotations.TextAnnotation.class).toLowerCase()) || dict.secondPersonPronouns.contains(w.get(CoreAnnotations.TextAnnotation.class).toLowerCase())) {
                discourseWithIorYou.add(utterIndex);
            }
            if (maxUtter < utterIndex)
                maxUtter = utterIndex;
        }
    }
    if (!speakerChange)
        return DocType.ARTICLE;
    // in conversation, utter index keep increasing.
    return DocType.CONVERSATION;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 55 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class Document method findSpeakersInConversation.

private void findSpeakersInConversation(Dictionaries dict) {
    for (List<Mention> l : predictedOrderedMentionsBySentence) {
        for (Mention m : l) {
            if (m.predicateNominatives == null)
                continue;
            for (Mention a : m.predicateNominatives) {
                if (a.spanToString().toLowerCase().equals("i")) {
                    speakers.put(m.headWord.get(CoreAnnotations.UtteranceAnnotation.class), Integer.toString(m.mentionID));
                }
            }
        }
    }
    List<CoreMap> paragraph = new ArrayList<>();
    int paragraphUtterIndex = 0;
    String nextParagraphSpeaker = "";
    int paragraphOffset = 0;
    for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        int currentUtter = sent.get(CoreAnnotations.TokensAnnotation.class).get(0).get(CoreAnnotations.UtteranceAnnotation.class);
        if (paragraphUtterIndex != currentUtter) {
            nextParagraphSpeaker = findParagraphSpeaker(paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
            paragraphUtterIndex = currentUtter;
            paragraphOffset += paragraph.size();
            paragraph = new ArrayList<>();
        }
        paragraph.add(sent);
    }
    findParagraphSpeaker(paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)253 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)172 CoreLabel (edu.stanford.nlp.ling.CoreLabel)102 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)53 Annotation (edu.stanford.nlp.pipeline.Annotation)49 Tree (edu.stanford.nlp.trees.Tree)28 Properties (java.util.Properties)23 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)20 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9