use of edu.stanford.nlp.paragraphs.ParagraphAnnotator in project CoreNLP by stanfordnlp.
the class QuoteAttributionAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
// boolean perDocumentCharacterMap = false;
if (buildCharacterMapPerAnnotation) {
if (annotation.containsKey(CoreAnnotations.MentionsAnnotation.class)) {
// Put all mentions from this key that are NER type PERSON into the characterMap
entityMentionsToCharacterMap(annotation);
}
}
// 0. pre-preprocess the text with paragraph annotations
// TODO: maybe move this out, definitely make it so that you can set paragraph breaks
Properties propsPara = new Properties();
propsPara.setProperty("paragraphBreak", "one");
ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
pa.annotate(annotation);
// 1. preprocess the text
// a) setup coref
Map<Integer, String> pronounCorefMap = QuoteAttributionUtils.setupCoref(COREF_PATH, characterMap, annotation);
// log.info("Pronoun coref map is " + pronounCorefMap);
// annotate chapter numbers in sentences. Useful for denoting chapter boundaries
new ChapterAnnotator().annotate(annotation);
// to incorporate sentences across paragraphs
QuoteAttributionUtils.addEnhancedSentences(annotation, parser);
// annotate depparse of quote-removed sentences
QuoteAttributionUtils.annotateForDependencyParse(annotation, parser);
Annotation preprocessed = annotation;
// 2. Quote->Mention annotation
Map<String, QMSieve> qmSieves = getQMMapping(preprocessed, pronounCorefMap);
for (String sieveName : qmSieveList.split(",")) {
qmSieves.get(sieveName).doQuoteToMention(preprocessed);
}
// 3. Mention->Speaker annotation
Map<String, MSSieve> msSieves = getMSMapping(preprocessed, pronounCorefMap);
for (String sieveName : msSieveList.split(",")) {
msSieves.get(sieveName).doMentionToSpeaker(preprocessed);
}
// see if any speaker's could be matched to a canonical entity mention
for (CoreMap quote : QuoteAnnotator.gatherQuotes(annotation)) {
Integer firstSpeakerTokenIndex = quote.get(MentionBeginAnnotation.class);
if (firstSpeakerTokenIndex != null) {
CoreLabel firstSpeakerToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(firstSpeakerTokenIndex);
Integer entityMentionIndex = firstSpeakerToken.get(CoreAnnotations.EntityMentionIndexAnnotation.class);
if (entityMentionIndex != null) {
// set speaker string
CoreMap entityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(entityMentionIndex);
Integer canonicalEntityMentionIndex = entityMention.get(CoreAnnotations.CanonicalEntityMentionIndexAnnotation.class);
if (canonicalEntityMentionIndex != null) {
CoreMap canonicalEntityMention = annotation.get(CoreAnnotations.MentionsAnnotation.class).get(canonicalEntityMentionIndex);
// add canonical entity mention info to quote
quote.set(CanonicalMentionAnnotation.class, canonicalEntityMention.get(CoreAnnotations.TextAnnotation.class));
// set first and last tokens of canonical entity mention
List<CoreLabel> canonicalEntityMentionTokens = canonicalEntityMention.get(CoreAnnotations.TokensAnnotation.class);
CoreLabel canonicalEntityMentionFirstToken = canonicalEntityMentionTokens.get(0);
CoreLabel canonicalEntityMentionLastToken = canonicalEntityMentionTokens.get(canonicalEntityMentionTokens.size() - 1);
quote.set(CanonicalMentionBeginAnnotation.class, canonicalEntityMentionFirstToken.get(CoreAnnotations.TokenBeginAnnotation.class));
quote.set(CanonicalMentionEndAnnotation.class, canonicalEntityMentionLastToken.get(CoreAnnotations.TokenBeginAnnotation.class));
}
}
}
}
}
use of edu.stanford.nlp.paragraphs.ParagraphAnnotator in project CoreNLP by stanfordnlp.
the class SupervisedSieveTraining method main.
public static void main(String[] args) throws Exception {
String home = "/home/mjfang/action_grammars/";
// make the first argument one for a base directory
String specificFile = "1PPDevUncollapsed.props";
if (args.length >= 1) {
home = args[0];
}
if (args.length >= 2) {
specificFile = args[1];
}
System.out.println("Base directory: " + home);
Properties props = StringUtils.propFileToProperties(home + "ExtractQuotesXMLScripts/" + specificFile);
XMLToAnnotation.Data data = XMLToAnnotation.readXMLFormat(props.getProperty("file"));
Properties propsPara = new Properties();
propsPara.setProperty("paragraphBreak", "one");
ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
pa.annotate(data.doc);
Properties annotatorProps = new Properties();
// "characterList.txt"
annotatorProps.setProperty("charactersPath", props.getProperty("charactersPath"));
annotatorProps.setProperty("booknlpCoref", props.getProperty("booknlpCoref"));
// "model.ser");
annotatorProps.setProperty("modelPath", props.getProperty("modelPath"));
QuoteAttributionAnnotator qaa = new QuoteAttributionAnnotator(annotatorProps);
qaa.annotate(data.doc);
ChapterAnnotator ca = new ChapterAnnotator();
ca.annotate(data.doc);
train(data, annotatorProps);
}
Aggregations