use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class HybridCorefPrinter method linkDistanceAnalysis.
public static void linkDistanceAnalysis(String[] args) throws Exception {
Properties props = StringUtils.argsToProperties(args);
HybridCorefSystem cs = new HybridCorefSystem(props);
cs.docMaker.resetDocs();
Counter<Integer> proper = new ClassicCounter<>();
Counter<Integer> common = new ClassicCounter<>();
Counter<Integer> pronoun = new ClassicCounter<>();
Counter<Integer> list = new ClassicCounter<>();
while (true) {
Document document = cs.docMaker.nextDoc();
if (document == null)
break;
for (int sentIdx = 0; sentIdx < document.predictedMentions.size(); sentIdx++) {
List<Mention> predictedInSent = document.predictedMentions.get(sentIdx);
for (int mIdx = 0; mIdx < predictedInSent.size(); mIdx++) {
Mention m = predictedInSent.get(mIdx);
loop: for (int distance = 0; distance <= sentIdx; distance++) {
List<Mention> candidates = Sieve.getOrderedAntecedents(m, sentIdx - distance, mIdx, document.predictedMentions, cs.dictionaries);
for (Mention candidate : candidates) {
if (candidate == m)
continue;
// ignore cataphora
if (distance == 0 && m.appearEarlierThan(candidate))
continue;
if (candidate.goldCorefClusterID == m.goldCorefClusterID) {
switch(m.mentionType) {
case NOMINAL:
if (candidate.mentionType == MentionType.NOMINAL || candidate.mentionType == MentionType.PROPER) {
common.incrementCount(distance);
break loop;
}
break;
case PROPER:
if (candidate.mentionType == MentionType.PROPER) {
proper.incrementCount(distance);
break loop;
}
break;
case PRONOMINAL:
pronoun.incrementCount(distance);
break loop;
case LIST:
if (candidate.mentionType == MentionType.LIST) {
list.incrementCount(distance);
break loop;
}
break;
default:
break;
}
}
}
}
}
}
}
System.out.println("PROPER -------------------------------------------");
Counters.printCounterSortedByKeys(proper);
System.out.println("COMMON -------------------------------------------");
Counters.printCounterSortedByKeys(common);
System.out.println("PRONOUN -------------------------------------------");
Counters.printCounterSortedByKeys(pronoun);
System.out.println("LIST -------------------------------------------");
Counters.printCounterSortedByKeys(list);
log.info();
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefPrinter method printConllOutput.
public static String printConllOutput(Document document, List<List<Mention>> orderedMentions, boolean gold) {
Annotation anno = document.annotation;
List<List<String[]>> conllDocSentences = document.conllDoc.sentenceWordLists;
String docID = anno.get(CoreAnnotations.DocIDAnnotation.class);
StringBuilder sb = new StringBuilder();
sb.append("#begin document ").append(docID).append("\n");
List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
for (int sentNum = 0; sentNum < sentences.size(); sentNum++) {
List<CoreLabel> sentence = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
List<String[]> conllSentence = conllDocSentences.get(sentNum);
Map<Integer, Set<Mention>> mentionBeginOnly = Generics.newHashMap();
Map<Integer, Set<Mention>> mentionEndOnly = Generics.newHashMap();
Map<Integer, Set<Mention>> mentionBeginEnd = Generics.newHashMap();
for (int i = 0; i < sentence.size(); i++) {
mentionBeginOnly.put(i, new LinkedHashSet<>());
mentionEndOnly.put(i, new LinkedHashSet<>());
mentionBeginEnd.put(i, new LinkedHashSet<>());
}
for (Mention m : orderedMentions.get(sentNum)) {
if (m.startIndex == m.endIndex - 1) {
mentionBeginEnd.get(m.startIndex).add(m);
} else {
mentionBeginOnly.get(m.startIndex).add(m);
mentionEndOnly.get(m.endIndex - 1).add(m);
}
}
for (int i = 0; i < sentence.size(); i++) {
StringBuilder sb2 = new StringBuilder();
for (Mention m : mentionBeginOnly.get(i)) {
if (sb2.length() > 0) {
sb2.append("|");
}
int corefClusterId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
sb2.append("(").append(corefClusterId);
}
for (Mention m : mentionBeginEnd.get(i)) {
if (sb2.length() > 0) {
sb2.append("|");
}
int corefClusterId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
sb2.append("(").append(corefClusterId).append(")");
}
for (Mention m : mentionEndOnly.get(i)) {
if (sb2.length() > 0) {
sb2.append("|");
}
int corefClusterId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
sb2.append(corefClusterId).append(")");
}
if (sb2.length() == 0)
sb2.append("-");
String[] columns = conllSentence.get(i);
for (int j = 0; j < columns.length - 1; j++) {
String column = columns[j];
sb.append(column).append("\t");
}
sb.append(sb2).append("\n");
}
sb.append("\n");
}
sb.append("#end document").append("\n");
return sb.toString();
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefRules method getSpeakerClusterId.
/**
* Given the name of a speaker, returns the coref cluster id it belongs to (-1 if no cluster)
* @param document The document to search in
* @param speakerString The name to search for
* @return cluster id
*/
public static int getSpeakerClusterId(Document document, String speakerString) {
int speakerClusterId = -1;
// try looking up cluster id from speaker info
SpeakerInfo speakerInfo = null;
if (speakerString != null) {
speakerInfo = document.getSpeakerInfo(speakerString);
if (speakerInfo != null) {
speakerClusterId = speakerInfo.getCorefClusterId();
}
}
if (speakerClusterId < 0 && speakerString != null && NumberMatchingRegex.isDecimalInteger(speakerString)) {
// speakerString is number so is mention id
try {
int speakerMentionId = Integer.parseInt(speakerString);
Mention mention = document.predictedMentionsByID.get(speakerMentionId);
if (mention != null) {
speakerClusterId = mention.corefClusterID;
if (speakerInfo != null)
speakerInfo.addMention(mention);
}
} catch (Exception e) {
}
}
return speakerClusterId;
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefRules method entityAlias.
public static boolean entityAlias(CorefCluster mentionCluster, CorefCluster potentialAntecedent, Semantics semantics, Dictionaries dict) throws Exception {
Mention mention = mentionCluster.getRepresentativeMention();
Mention antecedent = potentialAntecedent.getRepresentativeMention();
if (mention.mentionType != MentionType.PROPER || antecedent.mentionType != MentionType.PROPER)
return false;
Method meth = semantics.wordnet.getClass().getMethod("alias", new Class[] { Mention.class, Mention.class });
if ((Boolean) meth.invoke(semantics.wordnet, new Object[] { mention, antecedent })) {
return true;
}
return false;
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefUtils method mergeCoreferenceClusters.
public static void mergeCoreferenceClusters(Pair<Integer, Integer> mentionPair, Document document) {
Mention m1 = document.predictedMentionsByID.get(mentionPair.first);
Mention m2 = document.predictedMentionsByID.get(mentionPair.second);
if (m1.corefClusterID == m2.corefClusterID) {
return;
}
int removeId = m1.corefClusterID;
CorefCluster c1 = document.corefClusters.get(m1.corefClusterID);
CorefCluster c2 = document.corefClusters.get(m2.corefClusterID);
CorefCluster.mergeClusters(c2, c1);
document.corefClusters.remove(removeId);
}
Aggregations