use of edu.stanford.nlp.dcoref.CorefChain in project CoreNLP by stanfordnlp.
the class DeterministicCorefAnnotator method addObsoleteCoreferenceAnnotations.
// for backward compatibility with a few old things
// TODO: Aim to get rid of this entirely
private static void addObsoleteCoreferenceAnnotations(Annotation annotation, List<List<Mention>> orderedMentions, Map<Integer, CorefChain> result) {
List<Pair<IntTuple, IntTuple>> links = SieveCoreferenceSystem.getLinks(result);
if (VERBOSE) {
System.err.printf("Found %d coreference links:\n", links.size());
for (Pair<IntTuple, IntTuple> link : links) {
System.err.printf("LINK (%d, %d) -> (%d, %d)\n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
}
}
//
// save the coref output as CorefGraphAnnotation
//
// cdm 2013: this block didn't seem to be doing anything needed....
// List<List<CoreLabel>> sents = new ArrayList<List<CoreLabel>>();
// for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
// List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// sents.add(tokens);
// }
// this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
for (Pair<IntTuple, IntTuple> link : links) {
//
// Note: all offsets in the graph start at 1 (not at 0!)
// we do this for consistency reasons, as indices for syntactic dependencies start at 1
//
int srcSent = link.first.get(0);
int srcTok = orderedMentions.get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
int dstSent = link.second.get(0);
int dstTok = orderedMentions.get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
IntTuple dst = new IntTuple(2);
dst.set(0, dstSent);
dst.set(1, dstTok);
IntTuple src = new IntTuple(2);
src.set(0, srcSent);
src.set(1, srcTok);
graph.add(new Pair<>(src, dst));
}
annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
for (CorefChain corefChain : result.values()) {
if (corefChain.getMentionsInTextualOrder().size() < 2)
continue;
Set<CoreLabel> coreferentTokens = Generics.newHashSet();
for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
coreferentTokens.add(token);
}
for (CoreLabel token : coreferentTokens) {
token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
}
}
}
use of edu.stanford.nlp.dcoref.CorefChain in project CoreNLP by stanfordnlp.
the class DeterministicCorefAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
// temporarily set the primary named entity tag to the coarse tag
setNamedEntityTagGranularity(annotation, "coarse");
if (performMentionDetection)
mentionAnnotator.annotate(annotation);
try {
List<Tree> trees = new ArrayList<>();
List<List<CoreLabel>> sentences = new ArrayList<>();
// extract trees and sentence words
// we are only supporting the new annotation standard for this Annotator!
boolean hasSpeakerAnnotations = false;
if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
// int sentNum = 0;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
sentences.add(tokens);
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
trees.add(tree);
// locking here is crucial for correct threading!
SemanticGraph dependencies = SemanticGraphFactory.makeFromTree(tree, Mode.COLLAPSED, Extras.NONE, null, true);
sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, dependencies);
if (!hasSpeakerAnnotations) {
// check for speaker annotations
for (CoreLabel t : tokens) {
if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) {
hasSpeakerAnnotations = true;
break;
}
}
}
MentionExtractor.mergeLabels(tree, tokens);
MentionExtractor.initializeUtterance(tokens);
}
} else {
log.error("this coreference resolution system requires SentencesAnnotation!");
return;
}
if (hasSpeakerAnnotations) {
annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
}
// extract all possible mentions
// this is created for each new annotation because it is not threadsafe
RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing);
List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(annotation, 0, corefSystem.dictionaries());
// add the relevant info to mentions and order them for coref
Document document = mentionExtractor.arrange(annotation, sentences, trees, allUnprocessedMentions);
List<List<Mention>> orderedMentions = document.getOrderedMentions();
if (VERBOSE) {
for (int i = 0; i < orderedMentions.size(); i++) {
System.err.printf("Mentions in sentence #%d:%n", i);
for (int j = 0; j < orderedMentions.get(i).size(); j++) {
log.info("\tMention #" + j + ": " + orderedMentions.get(i).get(j).spanToString());
}
}
}
Map<Integer, edu.stanford.nlp.coref.data.CorefChain> result = corefSystem.corefReturnHybridOutput(document);
annotation.set(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefChainAnnotation.class, result);
if (OLD_FORMAT) {
Map<Integer, CorefChain> oldResult = corefSystem.coref(document);
addObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult);
}
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
// restore to the fine-grained
setNamedEntityTagGranularity(annotation, "fine");
}
}
use of edu.stanford.nlp.dcoref.CorefChain in project CoreNLP by stanfordnlp.
the class DcorefPronounResolver method resolvePronouns.
@Override
protected HashMap<Integer, Integer> resolvePronouns(List<CoreLabel> tokens) {
HashMap<Integer, Integer> pronPairs = new HashMap<Integer, Integer>(1);
CoreMap sentence = new CoreLabel();
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, 1);
List<CoreMap> sentences = new ArrayList<CoreMap>(1);
sentences.add(sentence);
Annotation annotation = new Annotation(sentences);
pipeline.annotate(annotation);
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
for (CorefChain chain : corefChains.values()) {
CoreLabel firstRef = null;
for (CorefMention m : chain.getMentionsInTextualOrder()) {
CoreLabel lbl = tokens.get(m.headIndex - 1);
if (lbl.tag().startsWith("PRP") && firstRef != null) {
pronPairs.put(lbl.index(), firstRef.index());
} else if (!lbl.tag().startsWith("PRP") && firstRef == null) {
firstRef = lbl;
}
}
}
return pronPairs;
}
Aggregations