Search in sources :

Example 11 with IntPair

use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method findMentions.

@Override
public List<List<Mention>> findMentions(Annotation doc, Dictionaries dict, Properties props) {
    List<List<Mention>> predictedMentions = new ArrayList<>();
    Set<String> neStrings = Generics.newHashSet();
    List<Set<IntPair>> mentionSpanSetList = Generics.newArrayList();
    List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
    // extract premarked mentions, NP/PRP, named entity, enumerations
    for (CoreMap s : sentences) {
        List<Mention> mentions = new ArrayList<>();
        predictedMentions.add(mentions);
        Set<IntPair> mentionSpanSet = Generics.newHashSet();
        Set<IntPair> namedEntitySpanSet = Generics.newHashSet();
        extractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
        extractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
        extractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet);
        extractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet);
        addNamedEntityStrings(s, neStrings, namedEntitySpanSet);
        mentionSpanSetList.add(mentionSpanSet);
    }
    extractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings);
    // find head
    for (int i = 0; i < sentences.size(); i++) {
        findHead(sentences.get(i), predictedMentions.get(i));
    }
    // mention selection based on document-wise info
    removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang);
    // if this is for MD training, skip classification
    if (!CorefProperties.isMentionDetectionTraining(props)) {
        mdClassifier.classifyMentions(predictedMentions, dict, props);
    }
    return predictedMentions;
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) IntPair(edu.stanford.nlp.util.IntPair) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 12 with IntPair

use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReader method writeTabSep.

public static void writeTabSep(PrintWriter pw, CoreMap sentence, CollectionValuedMap<String, CoreMap> chainmap) {
    HeadFinder headFinder = new ModCollinsHeadFinder();
    List<CoreLabel> sentenceAnno = sentence.get(CoreAnnotations.TokensAnnotation.class);
    Tree sentenceTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    Map<Pair<Integer, Integer>, String> sentenceInfo = Generics.newHashMap();
    Set<Tree> sentenceSubTrees = sentenceTree.subTrees();
    sentenceTree.setSpans();
    Map<Pair<Integer, Integer>, Tree> treeSpanMap = Generics.newHashMap();
    Map<Pair<Integer, Integer>, List<Tree>> wordSpanMap = Generics.newHashMap();
    for (Tree ctree : sentenceSubTrees) {
        IntPair span = ctree.getSpan();
        if (span != null) {
            treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree);
            wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves());
        }
    }
    String[][] finalSentence;
    finalSentence = new String[sentenceAnno.size()][];
    Map<Pair<Integer, Integer>, String> allHeads = Generics.newHashMap();
    int index = -1;
    for (CoreLabel newAnno : sentenceAnno) {
        index += 1;
        String word = newAnno.word();
        String tag = newAnno.tag();
        String cat = newAnno.ner();
        String coref = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
        finalSentence[index] = new String[4];
        finalSentence[index][0] = word;
        finalSentence[index][1] = tag;
        finalSentence[index][2] = cat;
        finalSentence[index][3] = coref;
        if (coref == null) {
            sentenceInfo.put(Pair.makePair(index, index), coref);
            finalSentence[index][3] = "O";
        } else {
            String[] allC = coref.split("\\|");
            for (String corefG : allC) {
                Pair<Integer, Integer> mention = getMention(index, corefG, sentenceAnno);
                if (!include(sentenceInfo, mention, corefG)) {
                    // find largest NP in mention
                    sentenceInfo.put(mention, corefG);
                    Tree mentionTree = treeSpanMap.get(mention);
                    String head = null;
                    if (mentionTree != null) {
                        head = mentionTree.headTerminal(headFinder).nodeString();
                    } else if (mention.first.equals(mention.second)) {
                        head = word;
                    }
                    allHeads.put(mention, head);
                }
            }
            if (allHeads.values().contains(word)) {
                finalSentence[index][3] = "MENTION";
            } else {
                finalSentence[index][3] = "O";
            }
        }
    }
    for (int i = 0; i < finalSentence.length; i++) {
        String[] wordInfo = finalSentence[i];
        if (i < finalSentence.length - 1) {
            String[] nextWordInfo = finalSentence[i + 1];
            if (nextWordInfo[3].equals("MENTION") && nextWordInfo[0].equals("'s")) {
                wordInfo[3] = "MENTION";
                finalSentence[i + 1][3] = "O";
            }
        }
        pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]);
    }
    pw.println("");
}
Also used : ModCollinsHeadFinder(edu.stanford.nlp.trees.ModCollinsHeadFinder) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) ModCollinsHeadFinder(edu.stanford.nlp.trees.ModCollinsHeadFinder) HeadFinder(edu.stanford.nlp.trees.HeadFinder) ChineseSemanticHeadFinder(edu.stanford.nlp.trees.international.pennchinese.ChineseSemanticHeadFinder) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) List(java.util.List) ArrayList(java.util.ArrayList) IntPair(edu.stanford.nlp.util.IntPair) Pair(edu.stanford.nlp.util.Pair)

Example 13 with IntPair

use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.

the class CorefChain method deleteMention.

/**
   * Delete a mention from this coreference chain.
   * @param m The mention to delete.
   */
public void deleteMention(CorefMention m) {
    this.mentions.remove(m);
    IntPair position = new IntPair(m.sentNum, m.headIndex);
    this.mentionMap.remove(position);
}
Also used : IntPair(edu.stanford.nlp.util.IntPair)

Example 14 with IntPair

use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractNamedEntityMentions.

protected static void extractNamedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    String preNE = "O";
    int beginIndex = -1;
    for (CoreLabel w : sent) {
        String nerString = w.ner();
        if (!nerString.equals(preNE)) {
            int endIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
            if (!preNE.matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) {
                if (w.get(CoreAnnotations.TextAnnotation.class).equals("'s") && w.tag().equals("POS")) {
                    endIndex++;
                }
                IntPair mSpan = new IntPair(beginIndex, endIndex);
                // attached to the previous NER by the earlier heuristic
                if (beginIndex < endIndex && !mentionSpanSet.contains(mSpan)) {
                    int dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
                    mentions.add(m);
                    mentionSpanSet.add(mSpan);
                    namedEntitySpanSet.add(mSpan);
                }
            }
            beginIndex = endIndex;
            preNE = nerString;
        }
    }
    // NE at the end of sentence
    if (!preNE.matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) {
        IntPair mSpan = new IntPair(beginIndex, sent.size());
        if (!mentionSpanSet.contains(mSpan)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIndex, sent.size(), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, sent.size())));
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            namedEntitySpanSet.add(mSpan);
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IntPair(edu.stanford.nlp.util.IntPair)

Example 15 with IntPair

use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractNPorPRP.

public void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        //if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        if (!mentionSpanSet.contains(mSpan) && (lang == Locale.CHINESE || !insideNE(mSpan, namedEntitySpanSet))) {
            //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
        //        if(m.originalSpan.size() > 1) {
        //          boolean isNE = true;
        //          for(CoreLabel cl : m.originalSpan) {
        //            if(!cl.tag().startsWith("NNP")) isNE = false;
        //          }
        //          if(isNE) {
        //            namedEntitySpanSet.add(mSpan);
        //          }
        //        }
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Aggregations

IntPair (edu.stanford.nlp.util.IntPair)37 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)19 CoreLabel (edu.stanford.nlp.ling.CoreLabel)17 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)16 Mention (edu.stanford.nlp.coref.data.Mention)14 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)11 CoreMap (edu.stanford.nlp.util.CoreMap)9 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)8 ArrayList (java.util.ArrayList)8 IndexedWord (edu.stanford.nlp.ling.IndexedWord)7 Tree (edu.stanford.nlp.trees.Tree)7 List (java.util.List)6 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)5 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)3 TregexMatcher (edu.stanford.nlp.trees.tregex.TregexMatcher)3 TregexPattern (edu.stanford.nlp.trees.tregex.TregexPattern)3 CollectionValuedMap (edu.stanford.nlp.util.CollectionValuedMap)3 Set (java.util.Set)3 BasicDependenciesAnnotation (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)2 Constituent (edu.stanford.nlp.trees.Constituent)2