Search in sources :

Example 1 with ParagraphAnnotator

use of edu.stanford.nlp.paragraphs.ParagraphAnnotator in project CoreNLP by stanfordnlp.

the class QuoteAttributionAnnotator method annotate.

@Override
public void annotate(Annotation annotation) {
    // boolean perDocumentCharacterMap = false;
    if (buildCharacterMapPerAnnotation) {
        if (annotation.containsKey(CoreAnnotations.MentionsAnnotation.class)) {
            // Put all mentions from this key that are NER type PERSON into the characterMap
            entityMentionsToCharacterMap(annotation);
        }
    }
    // 0. pre-preprocess the text with paragraph annotations
    // TODO: maybe move this out, definitely make it so that you can set paragraph breaks
    Properties propsPara = new Properties();
    propsPara.setProperty("paragraphBreak", "one");
    ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
    pa.annotate(annotation);
    // 1. preprocess the text
    // a) setup coref
    Map<Integer, String> pronounCorefMap = QuoteAttributionUtils.setupCoref(COREF_PATH, characterMap, annotation);
    // log.info("Pronoun coref map is " + pronounCorefMap);
    // annotate chapter numbers in sentences. Useful for denoting chapter boundaries
    new ChapterAnnotator().annotate(annotation);
    // to incorporate sentences across paragraphs
    QuoteAttributionUtils.addEnhancedSentences(annotation, parser);
    // annotate depparse of quote-removed sentences
    QuoteAttributionUtils.annotateForDependencyParse(annotation, parser);
    Annotation preprocessed = annotation;
    // 2. Quote->Mention annotation
    Map<String, QMSieve> qmSieves = getQMMapping(preprocessed, pronounCorefMap);
    for (String sieveName : qmSieveList.split(",")) {
        qmSieves.get(sieveName).doQuoteToMention(preprocessed);
    }
    // 3. Mention->Speaker annotation
    Map<String, MSSieve> msSieves = getMSMapping(preprocessed, pronounCorefMap);
    for (String sieveName : msSieveList.split(",")) {
        msSieves.get(sieveName).doMentionToSpeaker(preprocessed);
    }
    // see if any speaker's could be matched to a canonical entity mention
    for (CoreMap quote : QuoteAnnotator.gatherQuotes(annotation)) {
        Integer firstSpeakerTokenIndex = quote.get(MentionBeginAnnotation.class);
        if (firstSpeakerTokenIndex != null) {
            CoreLabel firstSpeakerToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(firstSpeakerTokenIndex);
            Integer entityMentionIndex = firstSpeakerToken.get(CoreAnnotations.EntityMentionIndexAnnotation.class);
            if (entityMentionIndex != null) {
                // set speaker string
                CoreMap entityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(entityMentionIndex);
                Integer canonicalEntityMentionIndex = entityMention.get(CoreAnnotations.CanonicalEntityMentionIndexAnnotation.class);
                if (canonicalEntityMentionIndex != null) {
                    CoreMap canonicalEntityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(canonicalEntityMentionIndex);
                    // add canonical entity mention info to quote
                    quote.set(CanonicalMentionAnnotation.class, canonicalEntityMention.get(CoreAnnotations.TextAnnotation.class));
                    // set first and last tokens of canonical entity mention
                    List<CoreLabel> canonicalEntityMentionTokens = canonicalEntityMention.get(CoreAnnotations.TokensAnnotation.class);
                    CoreLabel canonicalEntityMentionFirstToken = canonicalEntityMentionTokens.get(0);
                    CoreLabel canonicalEntityMentionLastToken = canonicalEntityMentionTokens.get(canonicalEntityMentionTokens.size() - 1);
                    quote.set(CanonicalMentionBeginAnnotation.class, canonicalEntityMentionFirstToken.get(CoreAnnotations.TokenBeginAnnotation.class));
                    quote.set(CanonicalMentionEndAnnotation.class, canonicalEntityMentionLastToken.get(CoreAnnotations.TokenBeginAnnotation.class));
                }
            }
        }
    }
}
Also used : ChapterAnnotator(edu.stanford.nlp.quoteattribution.ChapterAnnotator) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) ParagraphAnnotator(edu.stanford.nlp.paragraphs.ParagraphAnnotator) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) MSSieve(edu.stanford.nlp.quoteattribution.Sieves.MSSieves.MSSieve)

Example 2 with ParagraphAnnotator

use of edu.stanford.nlp.paragraphs.ParagraphAnnotator in project CoreNLP by stanfordnlp.

the class SupervisedSieveTraining method main.

public static void main(String[] args) throws Exception {
    String home = "/home/mjfang/action_grammars/";
    // make the first argument one for a base directory
    String specificFile = "1PPDevUncollapsed.props";
    if (args.length >= 1) {
        home = args[0];
    }
    if (args.length >= 2) {
        specificFile = args[1];
    }
    System.out.println("Base directory: " + home);
    Properties props = StringUtils.propFileToProperties(home + "ExtractQuotesXMLScripts/" + specificFile);
    XMLToAnnotation.Data data = XMLToAnnotation.readXMLFormat(props.getProperty("file"));
    Properties propsPara = new Properties();
    propsPara.setProperty("paragraphBreak", "one");
    ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
    pa.annotate(data.doc);
    Properties annotatorProps = new Properties();
    // "characterList.txt"
    annotatorProps.setProperty("charactersPath", props.getProperty("charactersPath"));
    annotatorProps.setProperty("booknlpCoref", props.getProperty("booknlpCoref"));
    // "model.ser");
    annotatorProps.setProperty("modelPath", props.getProperty("modelPath"));
    QuoteAttributionAnnotator qaa = new QuoteAttributionAnnotator(annotatorProps);
    qaa.annotate(data.doc);
    ChapterAnnotator ca = new ChapterAnnotator();
    ca.annotate(data.doc);
    train(data, annotatorProps);
}
Also used : ParagraphAnnotator(edu.stanford.nlp.paragraphs.ParagraphAnnotator) QuoteAttributionAnnotator(edu.stanford.nlp.pipeline.QuoteAttributionAnnotator)

Aggregations

ParagraphAnnotator (edu.stanford.nlp.paragraphs.ParagraphAnnotator)2 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)1 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)1 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 QuoteAttributionAnnotator (edu.stanford.nlp.pipeline.QuoteAttributionAnnotator)1 ChapterAnnotator (edu.stanford.nlp.quoteattribution.ChapterAnnotator)1 MSSieve (edu.stanford.nlp.quoteattribution.Sieves.MSSieves.MSSieve)1