use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method fillMentionInfo.
/** initialize several variables for mentions
* @throws Exception
*/
private static void fillMentionInfo(Document doc, Dictionaries dict, LogisticClassifier<String, String> singletonPredictor, HeadFinder headFinder) throws Exception {
List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
for (int i = 0; i < doc.predictedMentions.size(); i++) {
CoreMap sentence = sentences.get(i);
for (int j = 0; j < doc.predictedMentions.get(i).size(); j++) {
Mention m = doc.predictedMentions.get(i).get(j);
// mentionsByID
doc.predictedMentionsByID.put(m.mentionID, m);
IntTuple pos = new IntTuple(2);
pos.set(0, i);
pos.set(1, j);
// positions
doc.positions.put(m, pos);
// sentNum
m.sentNum = i;
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, i);
headPosition.set(1, m.headIndex);
// headPositions
doc.mentionheadPositions.put(headPosition, m);
m.contextParseTree = sentence.get(TreeAnnotation.class);
// m.sentenceWords = sentence.get(TokensAnnotation.class);
m.basicDependency = sentence.get(BasicDependenciesAnnotation.class);
m.enhancedDependency = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (m.enhancedDependency == null) {
m.enhancedDependency = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
// mentionSubTree (highest NP that has the same head) if constituency tree available
if (m.contextParseTree != null) {
Tree headTree = m.contextParseTree.getLeaves().get(m.headIndex);
if (headTree == null) {
throw new RuntimeException("Missing head tree for a mention!");
}
Tree t = headTree;
while ((t = t.parent(m.contextParseTree)) != null) {
if (t.headTerminal(headFinder) == headTree && t.value().equals("NP")) {
m.mentionSubTree = t;
} else if (m.mentionSubTree != null) {
break;
}
}
if (m.mentionSubTree == null) {
m.mentionSubTree = headTree;
}
}
m.process(dict, null, singletonPredictor);
}
}
boolean hasGold = (doc.goldMentions != null);
if (hasGold) {
doc.goldMentionsByID = Generics.newHashMap();
int sentNum = 0;
for (List<Mention> golds : doc.goldMentions) {
for (Mention g : golds) {
doc.goldMentionsByID.put(g.mentionID, g);
g.sentNum = sentNum;
}
sentNum++;
}
}
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findSubject.
private static boolean findSubject(Document doc, SemanticGraph dependency, IndexedWord w, int sentNum, int utterNum) {
for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
if (child.first().getShortName().equals("nsubj")) {
String subjectString = child.second().word();
// start from 1
int subjectIndex = child.second().index();
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, sentNum);
headPosition.set(1, subjectIndex - 1);
String speaker;
if (doc.mentionheadPositions.containsKey(headPosition)) {
speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
} else {
speaker = subjectString;
}
doc.speakers.put(utterNum, speaker);
return true;
}
}
return false;
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class Document method extractGoldLinks.
/** Extract gold coref link information */
protected void extractGoldLinks() {
// List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
// position of each mention in the input matrix, by id
Map<Integer, IntTuple> positions = Generics.newHashMap();
// positions of antecedents
Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
for (int i = 0; i < goldMentions.size(); i++) {
for (int j = 0; j < goldMentions.get(i).size(); j++) {
Mention m = goldMentions.get(i).get(j);
int id = m.mentionID;
IntTuple pos = new IntTuple(2);
pos.set(0, i);
pos.set(1, j);
positions.put(id, pos);
antecedents.put(id, new ArrayList<>());
}
}
// SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
for (List<Mention> mentions : goldMentions) {
for (Mention m : mentions) {
int id = m.mentionID;
IntTuple src = positions.get(id);
assert (src != null);
if (m.originalRef >= 0) {
IntTuple dst = positions.get(m.originalRef);
if (dst == null) {
throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
}
// to deal with cataphoric annotation
while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
Mention dstMention = goldMentions.get(dst.get(0)).get(dst.get(1));
m.originalRef = dstMention.originalRef;
dstMention.originalRef = id;
if (m.originalRef < 0)
break;
dst = positions.get(m.originalRef);
}
if (m.originalRef < 0)
continue;
// A B C: if A<-B, A<-C => make a link B<-C
for (int k = dst.get(0); k <= src.get(0); k++) {
for (int l = 0; l < goldMentions.get(k).size(); l++) {
if (k == dst.get(0) && l < dst.get(1))
continue;
if (k == src.get(0) && l > src.get(1))
break;
IntTuple missed = new IntTuple(2);
missed.set(0, k);
missed.set(1, l);
if (links.contains(new Pair<>(missed, dst))) {
antecedents.get(id).add(missed);
links.add(new Pair<>(src, missed));
}
}
}
links.add(new Pair<>(src, dst));
assert (antecedents.get(id) != null);
antecedents.get(id).add(dst);
List<IntTuple> ants = antecedents.get(m.originalRef);
assert (ants != null);
for (IntTuple ant : ants) {
antecedents.get(id).add(ant);
links.add(new Pair<>(src, ant));
}
}
}
}
goldLinks = links;
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class Document method findNextParagraphSpeaker.
private String findNextParagraphSpeaker(List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) {
CoreMap lastSent = paragraph.get(paragraph.size() - 1);
String speaker = "";
for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) {
if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report") || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) {
String word = w.get(CoreAnnotations.TextAnnotation.class);
SemanticGraph dependency = lastSent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
IndexedWord t = dependency.getNodeByWordPattern(word);
for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) {
if (child.first().getShortName().equals("nsubj")) {
// start from 1
int subjectIndex = child.second().index();
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
headPosition.set(1, subjectIndex - 1);
if (mentionheadPositions.containsKey(headPosition) && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) {
speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
}
}
}
}
}
return speaker;
}
use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.
the class Document method extractGoldLinks.
/** Extract gold coref link information */
protected void extractGoldLinks() {
// List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
// position of each mention in the input matrix, by id
Map<Integer, IntTuple> positions = Generics.newHashMap();
// positions of antecedents
Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
for (int i = 0; i < goldOrderedMentionsBySentence.size(); i++) {
for (int j = 0; j < goldOrderedMentionsBySentence.get(i).size(); j++) {
Mention m = goldOrderedMentionsBySentence.get(i).get(j);
int id = m.mentionID;
IntTuple pos = new IntTuple(2);
pos.set(0, i);
pos.set(1, j);
positions.put(id, pos);
antecedents.put(id, new ArrayList<>());
}
}
// SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
for (List<Mention> mentions : goldOrderedMentionsBySentence) {
for (Mention m : mentions) {
int id = m.mentionID;
IntTuple src = positions.get(id);
assert (src != null);
if (m.originalRef >= 0) {
IntTuple dst = positions.get(m.originalRef);
if (dst == null) {
throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
}
// to deal with cataphoric annotation
while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
Mention dstMention = goldOrderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
m.originalRef = dstMention.originalRef;
dstMention.originalRef = id;
if (m.originalRef < 0)
break;
dst = positions.get(m.originalRef);
}
if (m.originalRef < 0)
continue;
// A B C: if A<-B, A<-C => make a link B<-C
for (int k = dst.get(0); k <= src.get(0); k++) {
for (int l = 0; l < goldOrderedMentionsBySentence.get(k).size(); l++) {
if (k == dst.get(0) && l < dst.get(1))
continue;
if (k == src.get(0) && l > src.get(1))
break;
IntTuple missed = new IntTuple(2);
missed.set(0, k);
missed.set(1, l);
if (links.contains(new Pair<>(missed, dst))) {
antecedents.get(id).add(missed);
links.add(new Pair<>(src, missed));
}
}
}
links.add(new Pair<>(src, dst));
assert (antecedents.get(id) != null);
antecedents.get(id).add(dst);
List<IntTuple> ants = antecedents.get(m.originalRef);
assert (ants != null);
for (IntTuple ant : ants) {
antecedents.get(id).add(ant);
links.add(new Pair<>(src, ant));
}
}
}
}
goldLinks = links;
}
Aggregations