use of edu.stanford.nlp.dcoref.Document in project CoreNLP by stanfordnlp.
the class DeterministicCorefAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
try {
List<Tree> trees = new ArrayList<>();
List<List<CoreLabel>> sentences = new ArrayList<>();
// extract trees and sentence words
// we are only supporting the new annotation standard for this Annotator!
boolean hasSpeakerAnnotations = false;
if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
// int sentNum = 0;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
sentences.add(tokens);
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
trees.add(tree);
// locking here is crucial for correct threading!
SemanticGraph dependencies = SemanticGraphFactory.makeFromTree(tree, Mode.COLLAPSED, Extras.NONE, null, true);
sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, dependencies);
if (!hasSpeakerAnnotations) {
// check for speaker annotations
for (CoreLabel t : tokens) {
if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) {
hasSpeakerAnnotations = true;
break;
}
}
}
MentionExtractor.mergeLabels(tree, tokens);
MentionExtractor.initializeUtterance(tokens);
}
} else {
log.error("this coreference resolution system requires SentencesAnnotation!");
return;
}
if (hasSpeakerAnnotations) {
annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
}
// extract all possible mentions
// this is created for each new annotation because it is not threadsafe
RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing);
List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(annotation, 0, corefSystem.dictionaries());
// add the relevant info to mentions and order them for coref
Document document = mentionExtractor.arrange(annotation, sentences, trees, allUnprocessedMentions);
List<List<Mention>> orderedMentions = document.getOrderedMentions();
if (VERBOSE) {
for (int i = 0; i < orderedMentions.size(); i++) {
System.err.printf("Mentions in sentence #%d:%n", i);
for (int j = 0; j < orderedMentions.get(i).size(); j++) {
log.info("\tMention #" + j + ": " + orderedMentions.get(i).get(j).spanToString());
}
}
}
Map<Integer, edu.stanford.nlp.coref.data.CorefChain> result = corefSystem.corefReturnHybridOutput(document);
annotation.set(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefChainAnnotation.class, result);
if (OLD_FORMAT) {
Map<Integer, CorefChain> oldResult = corefSystem.coref(document);
addObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult);
}
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Aggregations