use of edu.stanford.nlp.quoteattribution.ChapterAnnotator in project CoreNLP by stanfordnlp.
the class QuoteAttributionAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
// boolean perDocumentCharacterMap = false;
if (buildCharacterMapPerAnnotation) {
if (annotation.containsKey(CoreAnnotations.MentionsAnnotation.class)) {
// Put all mentions from this key that are NER type PERSON into the characterMap
entityMentionsToCharacterMap(annotation);
}
}
// 0. pre-preprocess the text with paragraph annotations
// TODO: maybe move this out, definitely make it so that you can set paragraph breaks
Properties propsPara = new Properties();
propsPara.setProperty("paragraphBreak", "one");
ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
pa.annotate(annotation);
// 1. preprocess the text
// a) setup coref
Map<Integer, String> pronounCorefMap = QuoteAttributionUtils.setupCoref(COREF_PATH, characterMap, annotation);
// log.info("Pronoun coref map is " + pronounCorefMap);
// annotate chapter numbers in sentences. Useful for denoting chapter boundaries
new ChapterAnnotator().annotate(annotation);
// to incorporate sentences across paragraphs
QuoteAttributionUtils.addEnhancedSentences(annotation, parser);
// annotate depparse of quote-removed sentences
QuoteAttributionUtils.annotateForDependencyParse(annotation, parser);
Annotation preprocessed = annotation;
// 2. Quote->Mention annotation
Map<String, QMSieve> qmSieves = getQMMapping(preprocessed, pronounCorefMap);
for (String sieveName : qmSieveList.split(",")) {
qmSieves.get(sieveName).doQuoteToMention(preprocessed);
}
// 3. Mention->Speaker annotation
Map<String, MSSieve> msSieves = getMSMapping(preprocessed, pronounCorefMap);
for (String sieveName : msSieveList.split(",")) {
msSieves.get(sieveName).doMentionToSpeaker(preprocessed);
}
// see if any speaker's could be matched to a canonical entity mention
for (CoreMap quote : QuoteAnnotator.gatherQuotes(annotation)) {
Integer firstSpeakerTokenIndex = quote.get(MentionBeginAnnotation.class);
if (firstSpeakerTokenIndex != null) {
CoreLabel firstSpeakerToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(firstSpeakerTokenIndex);
Integer entityMentionIndex = firstSpeakerToken.get(CoreAnnotations.EntityMentionIndexAnnotation.class);
if (entityMentionIndex != null) {
// set speaker string
CoreMap entityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(entityMentionIndex);
Integer canonicalEntityMentionIndex = entityMention.get(CoreAnnotations.CanonicalEntityMentionIndexAnnotation.class);
if (canonicalEntityMentionIndex != null) {
CoreMap canonicalEntityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(canonicalEntityMentionIndex);
// add canonical entity mention info to quote
quote.set(CanonicalMentionAnnotation.class, canonicalEntityMention.get(CoreAnnotations.TextAnnotation.class));
// set first and last tokens of canonical entity mention
List<CoreLabel> canonicalEntityMentionTokens = canonicalEntityMention.get(CoreAnnotations.TokensAnnotation.class);
CoreLabel canonicalEntityMentionFirstToken = canonicalEntityMentionTokens.get(0);
CoreLabel canonicalEntityMentionLastToken = canonicalEntityMentionTokens.get(canonicalEntityMentionTokens.size() - 1);
quote.set(CanonicalMentionBeginAnnotation.class, canonicalEntityMentionFirstToken.get(CoreAnnotations.TokenBeginAnnotation.class));
quote.set(CanonicalMentionEndAnnotation.class, canonicalEntityMentionLastToken.get(CoreAnnotations.TokenBeginAnnotation.class));
}
}
}
}
}
Aggregations