Search in sources :

Example 1 with ChapterAnnotator

use of edu.stanford.nlp.quoteattribution.ChapterAnnotator in project CoreNLP by stanfordnlp.

the class QuoteAttributionAnnotator method annotate.

@Override
public void annotate(Annotation annotation) {
    // boolean perDocumentCharacterMap = false;
    if (buildCharacterMapPerAnnotation) {
        if (annotation.containsKey(CoreAnnotations.MentionsAnnotation.class)) {
            // Put all mentions from this key that are NER type PERSON into the characterMap
            entityMentionsToCharacterMap(annotation);
        }
    }
    // 0. pre-preprocess the text with paragraph annotations
    // TODO: maybe move this out, definitely make it so that you can set paragraph breaks
    Properties propsPara = new Properties();
    propsPara.setProperty("paragraphBreak", "one");
    ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
    pa.annotate(annotation);
    // 1. preprocess the text
    // a) setup coref
    Map<Integer, String> pronounCorefMap = QuoteAttributionUtils.setupCoref(COREF_PATH, characterMap, annotation);
    // log.info("Pronoun coref map is " + pronounCorefMap);
    // annotate chapter numbers in sentences. Useful for denoting chapter boundaries
    new ChapterAnnotator().annotate(annotation);
    // to incorporate sentences across paragraphs
    QuoteAttributionUtils.addEnhancedSentences(annotation, parser);
    // annotate depparse of quote-removed sentences
    QuoteAttributionUtils.annotateForDependencyParse(annotation, parser);
    Annotation preprocessed = annotation;
    // 2. Quote->Mention annotation
    Map<String, QMSieve> qmSieves = getQMMapping(preprocessed, pronounCorefMap);
    for (String sieveName : qmSieveList.split(",")) {
        qmSieves.get(sieveName).doQuoteToMention(preprocessed);
    }
    // 3. Mention->Speaker annotation
    Map<String, MSSieve> msSieves = getMSMapping(preprocessed, pronounCorefMap);
    for (String sieveName : msSieveList.split(",")) {
        msSieves.get(sieveName).doMentionToSpeaker(preprocessed);
    }
    // see if any speaker's could be matched to a canonical entity mention
    for (CoreMap quote : QuoteAnnotator.gatherQuotes(annotation)) {
        Integer firstSpeakerTokenIndex = quote.get(MentionBeginAnnotation.class);
        if (firstSpeakerTokenIndex != null) {
            CoreLabel firstSpeakerToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(firstSpeakerTokenIndex);
            Integer entityMentionIndex = firstSpeakerToken.get(CoreAnnotations.EntityMentionIndexAnnotation.class);
            if (entityMentionIndex != null) {
                // set speaker string
                CoreMap entityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(entityMentionIndex);
                Integer canonicalEntityMentionIndex = entityMention.get(CoreAnnotations.CanonicalEntityMentionIndexAnnotation.class);
                if (canonicalEntityMentionIndex != null) {
                    CoreMap canonicalEntityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(canonicalEntityMentionIndex);
                    // add canonical entity mention info to quote
                    quote.set(CanonicalMentionAnnotation.class, canonicalEntityMention.get(CoreAnnotations.TextAnnotation.class));
                    // set first and last tokens of canonical entity mention
                    List<CoreLabel> canonicalEntityMentionTokens = canonicalEntityMention.get(CoreAnnotations.TokensAnnotation.class);
                    CoreLabel canonicalEntityMentionFirstToken = canonicalEntityMentionTokens.get(0);
                    CoreLabel canonicalEntityMentionLastToken = canonicalEntityMentionTokens.get(canonicalEntityMentionTokens.size() - 1);
                    quote.set(CanonicalMentionBeginAnnotation.class, canonicalEntityMentionFirstToken.get(CoreAnnotations.TokenBeginAnnotation.class));
                    quote.set(CanonicalMentionEndAnnotation.class, canonicalEntityMentionLastToken.get(CoreAnnotations.TokenBeginAnnotation.class));
                }
            }
        }
    }
}
Also used : ChapterAnnotator(edu.stanford.nlp.quoteattribution.ChapterAnnotator) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) ParagraphAnnotator(edu.stanford.nlp.paragraphs.ParagraphAnnotator) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) MSSieve(edu.stanford.nlp.quoteattribution.Sieves.MSSieves.MSSieve)

Aggregations

CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)1 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)1 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 ParagraphAnnotator (edu.stanford.nlp.paragraphs.ParagraphAnnotator)1 ChapterAnnotator (edu.stanford.nlp.quoteattribution.ChapterAnnotator)1 MSSieve (edu.stanford.nlp.quoteattribution.Sieves.MSSieves.MSSieve)1