Search in sources :

Example 11 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method extractEnumerations.

protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
        matcher.getMatch();
        Tree m1 = matcher.getNode("m1");
        Tree m2 = matcher.getNode("m2");
        List<Tree> mLeaves = m1.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
        mLeaves = m2.getLeaves();
        beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
    }
    for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
        IntPair span = spanMention.getKey();
        if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
            mentions.add(m);
            mentionSpanSet.add(span);
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Map(java.util.Map) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 12 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReader method extractGoldMentions.

// extract gold mentions (mention span, mention ID, cluster ID)
public List<List<Mention>> extractGoldMentions(CoNLLDocument conllDoc) {
    List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allGoldMentions = new ArrayList<>();
    CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
    for (int i = 0; i < sentences.size(); i++) {
        allGoldMentions.add(new ArrayList<>());
    }
    for (String corefIdStr : corefChainMap.keySet()) {
        int id = Integer.parseInt(corefIdStr);
        for (CoreMap m : corefChainMap.get(corefIdStr)) {
            Mention mention = new Mention();
            mention.goldCorefClusterID = id;
            int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class);
            CoreMap sent = sentences.get(sentIndex);
            mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class);
            allGoldMentions.get(sentIndex).add(mention);
        }
    }
    return allGoldMentions;
}
Also used : Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 13 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method printErrorLogDcoref.

public static String printErrorLogDcoref(Mention m, Mention found, Document document, Dictionaries dict, int mIdx, String whichResolver) throws Exception {
    StringBuilder sb = new StringBuilder();
    sb.append("\nERROR START-----------------------------------------------------------------------\n");
    sb.append("RESOLVER TYPE: ").append(whichResolver).append("\n");
    sb.append("DOCUMENT: " + document.docInfo.get("DOC_ID") + ", " + document.docInfo.get("DOC_PART")).append("\n");
    List<Mention> orderedAnts = new ArrayList<>();
    sb.append("\nGOLD CLUSTER ID\n");
    for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
        int sentIdx = m.sentNum - sentDist;
        sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, true, true)).append("\n");
    }
    sb.append("\nMENTION ID\n");
    for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
        int sentIdx = m.sentNum - sentDist;
        sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, false, false)).append("\n");
    }
    // get dcoref antecedents ordering
    for (int sentDist = 0; sentDist <= m.sentNum; sentDist++) {
        int sentIdx = m.sentNum - sentDist;
        orderedAnts.addAll(Sieve.getOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
    }
    Map<Integer, Integer> orders = Generics.newHashMap();
    for (int i = 0; i < orderedAnts.size(); i++) {
        Mention ant = orderedAnts.get(i);
        orders.put(ant.mentionID, i);
    }
    CorefCluster mC = document.corefClusters.get(m.corefClusterID);
    boolean isFirstMention = isFirstMention(m, document);
    // we're printing only mentions that found coref antecedent
    boolean foundCorefAnt = true;
    boolean correctDecision = document.isCoref(m, found);
    if (correctDecision)
        return "";
    sb.append("\nMENTION: " + m.spanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision);
    sb.append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).append("\n");
    if (m.contextParseTree != null)
        sb.append(m.contextParseTree.pennString());
    sb.append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
    for (Mention ant : orderedAnts) {
        int antID = ant.mentionID;
        CorefCluster aC = document.corefClusters.get(ant.corefClusterID);
        boolean oracle = Sieve.isReallyCoref(document, m.mentionID, antID);
        int order = orders.get(antID);
        String oracleStr = (oracle) ? "coref   " : "notcoref";
        //      String dcorefStr = (dcoref)? "coref   " : "notcoref";
        String dcorefStr = "notcoref";
        if (dcorefSpeaker.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-speaker";
        else //      else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
        if (dcorefDiscourse.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-discourse";
        else if (dcorefExactString.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-exactString";
        else if (dcorefRelaxedExactString.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-relaxedExact";
        else if (dcorefPreciseConstructs.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-preciseConstruct";
        else if (dcorefHead1.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head1";
        else if (dcorefHead2.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head2";
        else if (dcorefHead3.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head3";
        else if (dcorefHead4.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head4";
        else if (dcorefRelaxedHead.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-relaxedHead";
        else if (dcorefPronounSieve.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-pronounSieve";
        dcorefStr += "\t" + String.valueOf(order);
        sb.append("\t\t" + oracleStr + "\t" + dcorefStr + "\t\t" + ant.spanToString() + " (" + ant.mentionID + ")\n");
    }
    sb.append("ERROR END -----------------------------------------------------------------------\n");
    return sb.toString();
}
Also used : CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention) ArrayList(java.util.ArrayList)

Example 14 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method printMentionDetectionLog.

public static String printMentionDetectionLog(Document document) {
    StringBuilder sbLog = new StringBuilder();
    List<CoreMap> sentences = document.annotation.get(SentencesAnnotation.class);
    sbLog.append("\nERROR START-----------------------------------------------------------------------\n");
    for (int i = 0; i < sentences.size(); i++) {
        sbLog.append("\nSENT ").append(i).append(" GOLD   : ").append(HybridCorefPrinter.sentenceStringWithMention(i, document, true, false)).append("\n");
        sbLog.append("SENT ").append(i).append(" PREDICT: ").append(HybridCorefPrinter.sentenceStringWithMention(i, document, false, false)).append("\n");
        for (Mention p : document.predictedMentions.get(i)) {
            sbLog.append("\n");
            if (!p.hasTwin)
                sbLog.append("\tSPURIOUS");
            sbLog.append("\tmention: ").append(p.spanToString()).append("\t\t\theadword: ").append(p.headString).append("\tPOS: ").append(p.headWord.tag()).append("\tmentiontype: ").append(p.mentionType).append("\tnumber: ").append(p.number).append("\tgender: ").append(p.gender).append("\tanimacy: ").append(p.animacy).append("\tperson: ").append(p.person).append("\tNE: ").append(p.nerString);
        }
        sbLog.append("\n");
        for (Mention g : document.goldMentions.get(i)) {
            if (!g.hasTwin) {
                sbLog.append("\tmissed gold: ").append(g.spanToString()).append("\tPOS: ").append(g.headWord.tag()).append("\tmentiontype: ").append(g.mentionType).append("\theadword: ").append(g.headString).append("\tnumber: ").append(g.number).append("\tgender: ").append(g.gender).append("\tanimacy: ").append(g.animacy).append("\tperson: ").append(g.person).append("\tNE: ").append(g.nerString).append("\n");
                if (g.sentenceWords != null)
                    if (g.sentenceWords.size() > g.endIndex)
                        sbLog.append("\tnextword: ").append(g.sentenceWords.get(g.endIndex)).append("\t").append(g.sentenceWords.get(g.endIndex).tag()).append("\n");
                if (g.contextParseTree != null)
                    sbLog.append(g.contextParseTree.pennString()).append("\n\n");
                else
                    sbLog.append("\n\n");
            }
        }
        if (sentences.get(i).get(TreeAnnotation.class) != null)
            sbLog.append("\n\tparse: \n").append(sentences.get(i).get(TreeAnnotation.class).pennString());
        sbLog.append("\n\tcollapsedDependency: \n").append(sentences.get(i).get(BasicDependenciesAnnotation.class));
    }
    sbLog.append("ERROR END -----------------------------------------------------------------------\n");
    return sbLog.toString();
}
Also used : Mention(edu.stanford.nlp.coref.data.Mention) BasicDependenciesAnnotation(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 15 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method printErrorLog.

public static String printErrorLog(Mention m, Document document, Counter<Integer> probs, int mIdx, Dictionaries dict, RFSieve sieve) throws Exception {
    StringBuilder sb = new StringBuilder();
    sb.append("\nERROR START-----------------------------------------------------------------------\n");
    sb.append("RESOLVER TYPE: mType: " + sieve.mType + ", aType: " + sieve.aType).append("\n");
    sb.append("DOCUMENT: " + document.docInfo.get("DOC_ID") + ", " + document.docInfo.get("DOC_PART")).append("\n");
    List<Mention> orderedAnts = new ArrayList<>();
    sb.append("\nGOLD CLUSTER ID\n");
    for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
        if (sentDist == sieve.maxSentDist)
            sb.append("\tstart compare from here-------------\n");
        int sentIdx = m.sentNum - sentDist;
        sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, true, true)).append("\n");
    }
    sb.append("\nMENTION ID\n");
    for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
        if (sentDist == sieve.maxSentDist)
            sb.append("\tstart compare from here-------------\n");
        int sentIdx = m.sentNum - sentDist;
        sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, false, false)).append("\n");
    }
    // get dcoref antecedents ordering
    for (int sentDist = 0; sentDist <= Math.min(sieve.maxSentDist, m.sentNum); sentDist++) {
        int sentIdx = m.sentNum - sentDist;
        orderedAnts.addAll(Sieve.getOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
    }
    Map<Integer, Integer> orders = Generics.newHashMap();
    for (int i = 0; i < orderedAnts.size(); i++) {
        Mention ant = orderedAnts.get(i);
        orders.put(ant.mentionID, i);
    }
    CorefCluster mC = document.corefClusters.get(m.corefClusterID);
    boolean isFirstMention = isFirstMention(m, document);
    boolean foundCorefAnt = (probs.size() > 0 && Counters.max(probs) > sieve.thresMerge);
    boolean correctDecision = ((isFirstMention && !foundCorefAnt) || (foundCorefAnt && Sieve.isReallyCoref(document, m.mentionID, Counters.argmax(probs))));
    boolean barePlural = (m.originalSpan.size() == 1 && m.headWord.tag().equals("NNS"));
    if (correctDecision)
        return "";
    sb.append("\nMENTION: " + m.spanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision + "\tbarePlural? " + barePlural);
    sb.append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).append("\n");
    if (m.contextParseTree != null)
        sb.append(m.contextParseTree.pennString());
    sb.append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
    for (int antID : Counters.toSortedList(probs)) {
        Mention ant = document.predictedMentionsByID.get(antID);
        CorefCluster aC = document.corefClusters.get(ant.corefClusterID);
        boolean oracle = Sieve.isReallyCoref(document, m.mentionID, antID);
        double prob = probs.getCount(antID);
        int order = orders.get(antID);
        String oracleStr = (oracle) ? "coref   " : "notcoref";
        //      String dcorefStr = (dcoref)? "coref   " : "notcoref";
        String dcorefStr = "notcoref";
        if (dcorefDiscourse.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-discourse";
        else //      else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
        if (dcorefExactString.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-exactString";
        else if (dcorefRelaxedExactString.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-relaxedExact";
        else if (dcorefPreciseConstructs.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-preciseConstruct";
        else if (dcorefHead1.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head1";
        else if (dcorefHead2.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head2";
        else if (dcorefHead3.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head3";
        else if (dcorefHead4.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-head4";
        else if (dcorefRelaxedHead.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-relaxedHead";
        else if (dcorefPronounSieve.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-pronounSieve";
        else if (dcorefSpeaker.coreferent(document, mC, aC, m, ant, dict, null))
            dcorefStr = "coref-speaker";
        dcorefStr += "\t" + String.valueOf(order);
        String probStr = df.format(prob);
        sb.append("\t\t" + oracleStr + "\t" + dcorefStr + "\t" + probStr + "\t\t" + ant.spanToString() + " (" + ant.mentionID + ")\n");
    }
    sb.append("ERROR END -----------------------------------------------------------------------\n");
    return sb.toString();
}
Also used : CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention) ArrayList(java.util.ArrayList)

Aggregations

Mention (edu.stanford.nlp.coref.data.Mention)62 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)27 CoreLabel (edu.stanford.nlp.ling.CoreLabel)27 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)21 ArrayList (java.util.ArrayList)20 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)17 CoreMap (edu.stanford.nlp.util.CoreMap)17 List (java.util.List)15 Tree (edu.stanford.nlp.trees.Tree)14 IntPair (edu.stanford.nlp.util.IntPair)14 CorefCluster (edu.stanford.nlp.coref.data.CorefCluster)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)9 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)7 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)7 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)7 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)6 Map (java.util.Map)6