use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findParagraphSpeaker.
private static String findParagraphSpeaker(Document doc, List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
if (!doc.speakers.containsKey(paragraphUtterIndex)) {
if (!nextParagraphSpeaker.isEmpty()) {
doc.speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
} else {
// cdm [Sept 2015] added this check to try to avoid crash
if (paragraph.isEmpty()) {
Redwood.log("debug-preprocessor", "Empty paragraph; skipping findParagraphSpeaker");
return "";
}
CoreMap lastSent = paragraph.get(paragraph.size() - 1);
String speaker = "";
boolean hasVerb = false;
for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
if (pos.startsWith("V")) {
hasVerb = true;
break;
}
if (ner.startsWith("PER")) {
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
headPosition.set(1, i);
if (doc.mentionheadPositions.containsKey(headPosition)) {
speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
}
}
}
if (!hasVerb && !speaker.equals("")) {
doc.speakers.put(paragraphUtterIndex, speaker);
}
}
}
return findNextParagraphSpeaker(doc, paragraph, paragraphOffset, dict);
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class Document method initializeCorefCluster.
/**
* initialize positions and corefClusters (put each mention in each CorefCluster)
*/
private void initializeCorefCluster() {
for (int i = 0; i < predictedOrderedMentionsBySentence.size(); i++) {
for (int j = 0; j < predictedOrderedMentionsBySentence.get(i).size(); j++) {
Mention m = predictedOrderedMentionsBySentence.get(i).get(j);
if (allPredictedMentions.containsKey(m.mentionID)) {
SieveCoreferenceSystem.logger.warning("WARNING: Already contain mention " + m.mentionID);
Mention m1 = allPredictedMentions.get(m.mentionID);
SieveCoreferenceSystem.logger.warning("OLD mention: " + m1.spanToString() + "[" + m1.startIndex + "," + m1.endIndex + "]");
SieveCoreferenceSystem.logger.warning("NEW mention: " + m.spanToString() + "[" + m.startIndex + "," + m.endIndex + "]");
// SieveCoreferenceSystem.debugPrintMentions(System.err, "PREDICTED ORDERED", predictedOrderedMentionsBySentence);
// SieveCoreferenceSystem.debugPrintMentions(System.err, "GOLD ORDERED", goldOrderedMentionsBySentence);
}
assert (!allPredictedMentions.containsKey(m.mentionID));
allPredictedMentions.put(m.mentionID, m);
IntTuple pos = new IntTuple(2);
pos.set(0, i);
pos.set(1, j);
positions.put(m, pos);
m.sentNum = i;
assert (!corefClusters.containsKey(m.mentionID));
corefClusters.put(m.mentionID, new CorefCluster(m.mentionID, Generics.newHashSet(Collections.singletonList(m))));
m.corefClusterID = m.mentionID;
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, i);
headPosition.set(1, m.headIndex);
mentionheadPositions.put(headPosition, m);
}
}
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class Document method findSpeaker.
private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) {
List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
for (int i = startIndex; i < endIndex; i++) {
if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0)
continue;
String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
if (dict.reportVerb.contains(lemma)) {
// find subject
SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
IndexedWord w = dependency.getNodeByWordPattern(word);
if (w != null) {
for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
if (child.first().getShortName().equals("nsubj")) {
String subjectString = child.second().word();
// start from 1
int subjectIndex = child.second().index();
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, sentNum);
headPosition.set(1, subjectIndex - 1);
String speaker;
if (mentionheadPositions.containsKey(headPosition)) {
speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
} else {
speaker = subjectString;
}
speakers.put(utterNum, speaker);
return true;
}
}
} else {
SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
}
}
}
return false;
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class DeterministicCorefAnnotator method addObsoleteCoreferenceAnnotations.
// for backward compatibility with a few old things
// TODO: Aim to get rid of this entirely
private static void addObsoleteCoreferenceAnnotations(Annotation annotation, List<List<Mention>> orderedMentions, Map<Integer, CorefChain> result) {
List<Pair<IntTuple, IntTuple>> links = SieveCoreferenceSystem.getLinks(result);
if (VERBOSE) {
System.err.printf("Found %d coreference links:\n", links.size());
for (Pair<IntTuple, IntTuple> link : links) {
System.err.printf("LINK (%d, %d) -> (%d, %d)\n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
}
}
//
// save the coref output as CorefGraphAnnotation
//
// cdm 2013: this block didn't seem to be doing anything needed....
// List<List<CoreLabel>> sents = new ArrayList<List<CoreLabel>>();
// for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
// List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// sents.add(tokens);
// }
// this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
for (Pair<IntTuple, IntTuple> link : links) {
//
// Note: all offsets in the graph start at 1 (not at 0!)
// we do this for consistency reasons, as indices for syntactic dependencies start at 1
//
int srcSent = link.first.get(0);
int srcTok = orderedMentions.get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
int dstSent = link.second.get(0);
int dstTok = orderedMentions.get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
IntTuple dst = new IntTuple(2);
dst.set(0, dstSent);
dst.set(1, dstTok);
IntTuple src = new IntTuple(2);
src.set(0, srcSent);
src.set(1, srcTok);
graph.add(new Pair<>(src, dst));
}
annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
for (CorefChain corefChain : result.values()) {
if (corefChain.getMentionsInTextualOrder().size() < 2)
continue;
Set<CoreLabel> coreferentTokens = Generics.newHashSet();
for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
coreferentTokens.add(token);
}
for (CoreLabel token : coreferentTokens) {
token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
}
}
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class Document method extractGoldLinks.
/** Extract gold coref link information */
protected void extractGoldLinks() {
// List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
// position of each mention in the input matrix, by id
Map<Integer, IntTuple> positions = Generics.newHashMap();
// positions of antecedents
Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
for (int i = 0; i < goldMentions.size(); i++) {
for (int j = 0; j < goldMentions.get(i).size(); j++) {
Mention m = goldMentions.get(i).get(j);
int id = m.mentionID;
IntTuple pos = new IntTuple(2);
pos.set(0, i);
pos.set(1, j);
positions.put(id, pos);
antecedents.put(id, new ArrayList<>());
}
}
// SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
for (List<Mention> mentions : goldMentions) {
for (Mention m : mentions) {
int id = m.mentionID;
IntTuple src = positions.get(id);
assert (src != null);
if (m.originalRef >= 0) {
IntTuple dst = positions.get(m.originalRef);
if (dst == null) {
throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
}
// to deal with cataphoric annotation
while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
Mention dstMention = goldMentions.get(dst.get(0)).get(dst.get(1));
m.originalRef = dstMention.originalRef;
dstMention.originalRef = id;
if (m.originalRef < 0)
break;
dst = positions.get(m.originalRef);
}
if (m.originalRef < 0)
continue;
// A B C: if A<-B, A<-C => make a link B<-C
for (int k = dst.get(0); k <= src.get(0); k++) {
for (int l = 0; l < goldMentions.get(k).size(); l++) {
if (k == dst.get(0) && l < dst.get(1))
continue;
if (k == src.get(0) && l > src.get(1))
break;
IntTuple missed = new IntTuple(2);
missed.set(0, k);
missed.set(1, l);
if (links.contains(new Pair<>(missed, dst))) {
antecedents.get(id).add(missed);
links.add(new Pair<>(src, missed));
}
}
}
links.add(new Pair<>(src, dst));
assert (antecedents.get(id) != null);
antecedents.get(id).add(dst);
List<IntTuple> ants = antecedents.get(m.originalRef);
assert (ants != null);
for (IntTuple ant : ants) {
antecedents.get(id).add(ant);
links.add(new Pair<>(src, ant));
}
}
}
}
goldLinks = links;
}
Aggregations