use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReader method extractGoldMentions.
// extract gold mentions (mention span, mention ID, cluster ID)
public List<List<Mention>> extractGoldMentions(CoNLLDocument conllDoc) {
List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
List<List<Mention>> allGoldMentions = new ArrayList<>();
CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
for (int i = 0; i < sentences.size(); i++) {
allGoldMentions.add(new ArrayList<>());
}
for (String corefIdStr : corefChainMap.keySet()) {
int id = Integer.parseInt(corefIdStr);
for (CoreMap m : corefChainMap.get(corefIdStr)) {
Mention mention = new Mention();
mention.goldCorefClusterID = id;
int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class);
CoreMap sent = sentences.get(sentIndex);
mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class);
allGoldMentions.get(sentIndex).add(mention);
}
}
return allGoldMentions;
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class HybridCorefPrinter method printErrorLogDcoref.
public static String printErrorLogDcoref(Mention m, Mention found, Document document, Dictionaries dict, int mIdx, String whichResolver) throws Exception {
StringBuilder sb = new StringBuilder();
sb.append("\nERROR START-----------------------------------------------------------------------\n");
sb.append("RESOLVER TYPE: ").append(whichResolver).append("\n");
sb.append("DOCUMENT: " + document.docInfo.get("DOC_ID") + ", " + document.docInfo.get("DOC_PART")).append("\n");
List<Mention> orderedAnts = new ArrayList<>();
sb.append("\nGOLD CLUSTER ID\n");
for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
int sentIdx = m.sentNum - sentDist;
sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, true, true)).append("\n");
}
sb.append("\nMENTION ID\n");
for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
int sentIdx = m.sentNum - sentDist;
sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, false, false)).append("\n");
}
// get dcoref antecedents ordering
for (int sentDist = 0; sentDist <= m.sentNum; sentDist++) {
int sentIdx = m.sentNum - sentDist;
orderedAnts.addAll(Sieve.getOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
}
Map<Integer, Integer> orders = Generics.newHashMap();
for (int i = 0; i < orderedAnts.size(); i++) {
Mention ant = orderedAnts.get(i);
orders.put(ant.mentionID, i);
}
CorefCluster mC = document.corefClusters.get(m.corefClusterID);
boolean isFirstMention = isFirstMention(m, document);
// we're printing only mentions that found coref antecedent
boolean foundCorefAnt = true;
boolean correctDecision = document.isCoref(m, found);
if (correctDecision)
return "";
sb.append("\nMENTION: " + m.spanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision);
sb.append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).append("\n");
if (m.contextParseTree != null)
sb.append(m.contextParseTree.pennString());
sb.append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
for (Mention ant : orderedAnts) {
int antID = ant.mentionID;
CorefCluster aC = document.corefClusters.get(ant.corefClusterID);
boolean oracle = Sieve.isReallyCoref(document, m.mentionID, antID);
int order = orders.get(antID);
String oracleStr = (oracle) ? "coref " : "notcoref";
// String dcorefStr = (dcoref)? "coref " : "notcoref";
String dcorefStr = "notcoref";
if (dcorefSpeaker.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-speaker";
else // else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
if (dcorefDiscourse.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-discourse";
else if (dcorefExactString.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-exactString";
else if (dcorefRelaxedExactString.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-relaxedExact";
else if (dcorefPreciseConstructs.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-preciseConstruct";
else if (dcorefHead1.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head1";
else if (dcorefHead2.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head2";
else if (dcorefHead3.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head3";
else if (dcorefHead4.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head4";
else if (dcorefRelaxedHead.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-relaxedHead";
else if (dcorefPronounSieve.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-pronounSieve";
dcorefStr += "\t" + String.valueOf(order);
sb.append("\t\t" + oracleStr + "\t" + dcorefStr + "\t\t" + ant.spanToString() + " (" + ant.mentionID + ")\n");
}
sb.append("ERROR END -----------------------------------------------------------------------\n");
return sb.toString();
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class HybridCorefPrinter method printMentionDetectionLog.
public static String printMentionDetectionLog(Document document) {
StringBuilder sbLog = new StringBuilder();
List<CoreMap> sentences = document.annotation.get(SentencesAnnotation.class);
sbLog.append("\nERROR START-----------------------------------------------------------------------\n");
for (int i = 0; i < sentences.size(); i++) {
sbLog.append("\nSENT ").append(i).append(" GOLD : ").append(HybridCorefPrinter.sentenceStringWithMention(i, document, true, false)).append("\n");
sbLog.append("SENT ").append(i).append(" PREDICT: ").append(HybridCorefPrinter.sentenceStringWithMention(i, document, false, false)).append("\n");
for (Mention p : document.predictedMentions.get(i)) {
sbLog.append("\n");
if (!p.hasTwin)
sbLog.append("\tSPURIOUS");
sbLog.append("\tmention: ").append(p.spanToString()).append("\t\t\theadword: ").append(p.headString).append("\tPOS: ").append(p.headWord.tag()).append("\tmentiontype: ").append(p.mentionType).append("\tnumber: ").append(p.number).append("\tgender: ").append(p.gender).append("\tanimacy: ").append(p.animacy).append("\tperson: ").append(p.person).append("\tNE: ").append(p.nerString);
}
sbLog.append("\n");
for (Mention g : document.goldMentions.get(i)) {
if (!g.hasTwin) {
sbLog.append("\tmissed gold: ").append(g.spanToString()).append("\tPOS: ").append(g.headWord.tag()).append("\tmentiontype: ").append(g.mentionType).append("\theadword: ").append(g.headString).append("\tnumber: ").append(g.number).append("\tgender: ").append(g.gender).append("\tanimacy: ").append(g.animacy).append("\tperson: ").append(g.person).append("\tNE: ").append(g.nerString).append("\n");
if (g.sentenceWords != null)
if (g.sentenceWords.size() > g.endIndex)
sbLog.append("\tnextword: ").append(g.sentenceWords.get(g.endIndex)).append("\t").append(g.sentenceWords.get(g.endIndex).tag()).append("\n");
if (g.contextParseTree != null)
sbLog.append(g.contextParseTree.pennString()).append("\n\n");
else
sbLog.append("\n\n");
}
}
if (sentences.get(i).get(TreeAnnotation.class) != null)
sbLog.append("\n\tparse: \n").append(sentences.get(i).get(TreeAnnotation.class).pennString());
sbLog.append("\n\tcollapsedDependency: \n").append(sentences.get(i).get(BasicDependenciesAnnotation.class));
}
sbLog.append("ERROR END -----------------------------------------------------------------------\n");
return sbLog.toString();
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class HybridCorefPrinter method printErrorLog.
public static String printErrorLog(Mention m, Document document, Counter<Integer> probs, int mIdx, Dictionaries dict, RFSieve sieve) throws Exception {
StringBuilder sb = new StringBuilder();
sb.append("\nERROR START-----------------------------------------------------------------------\n");
sb.append("RESOLVER TYPE: mType: " + sieve.mType + ", aType: " + sieve.aType).append("\n");
sb.append("DOCUMENT: " + document.docInfo.get("DOC_ID") + ", " + document.docInfo.get("DOC_PART")).append("\n");
List<Mention> orderedAnts = new ArrayList<>();
sb.append("\nGOLD CLUSTER ID\n");
for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
if (sentDist == sieve.maxSentDist)
sb.append("\tstart compare from here-------------\n");
int sentIdx = m.sentNum - sentDist;
sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, true, true)).append("\n");
}
sb.append("\nMENTION ID\n");
for (int sentDist = m.sentNum; sentDist >= 0; sentDist--) {
if (sentDist == sieve.maxSentDist)
sb.append("\tstart compare from here-------------\n");
int sentIdx = m.sentNum - sentDist;
sb.append("\tSENT " + sentIdx + "\t" + sentenceStringWithMention(sentIdx, document, false, false)).append("\n");
}
// get dcoref antecedents ordering
for (int sentDist = 0; sentDist <= Math.min(sieve.maxSentDist, m.sentNum); sentDist++) {
int sentIdx = m.sentNum - sentDist;
orderedAnts.addAll(Sieve.getOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
}
Map<Integer, Integer> orders = Generics.newHashMap();
for (int i = 0; i < orderedAnts.size(); i++) {
Mention ant = orderedAnts.get(i);
orders.put(ant.mentionID, i);
}
CorefCluster mC = document.corefClusters.get(m.corefClusterID);
boolean isFirstMention = isFirstMention(m, document);
boolean foundCorefAnt = (probs.size() > 0 && Counters.max(probs) > sieve.thresMerge);
boolean correctDecision = ((isFirstMention && !foundCorefAnt) || (foundCorefAnt && Sieve.isReallyCoref(document, m.mentionID, Counters.argmax(probs))));
boolean barePlural = (m.originalSpan.size() == 1 && m.headWord.tag().equals("NNS"));
if (correctDecision)
return "";
sb.append("\nMENTION: " + m.spanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision + "\tbarePlural? " + barePlural);
sb.append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).append("\n");
if (m.contextParseTree != null)
sb.append(m.contextParseTree.pennString());
sb.append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
for (int antID : Counters.toSortedList(probs)) {
Mention ant = document.predictedMentionsByID.get(antID);
CorefCluster aC = document.corefClusters.get(ant.corefClusterID);
boolean oracle = Sieve.isReallyCoref(document, m.mentionID, antID);
double prob = probs.getCount(antID);
int order = orders.get(antID);
String oracleStr = (oracle) ? "coref " : "notcoref";
// String dcorefStr = (dcoref)? "coref " : "notcoref";
String dcorefStr = "notcoref";
if (dcorefDiscourse.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-discourse";
else // else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
if (dcorefExactString.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-exactString";
else if (dcorefRelaxedExactString.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-relaxedExact";
else if (dcorefPreciseConstructs.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-preciseConstruct";
else if (dcorefHead1.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head1";
else if (dcorefHead2.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head2";
else if (dcorefHead3.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head3";
else if (dcorefHead4.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-head4";
else if (dcorefRelaxedHead.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-relaxedHead";
else if (dcorefPronounSieve.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-pronounSieve";
else if (dcorefSpeaker.coreferent(document, mC, aC, m, ant, dict, null))
dcorefStr = "coref-speaker";
dcorefStr += "\t" + String.valueOf(order);
String probStr = df.format(prob);
sb.append("\t\t" + oracleStr + "\t" + dcorefStr + "\t" + probStr + "\t\t" + ant.spanToString() + " (" + ant.mentionID + ")\n");
}
sb.append("ERROR END -----------------------------------------------------------------------\n");
return sb.toString();
}
Aggregations