use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class RelationExtractorAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
// extract entities and relations
Annotation output = mr.annotate(annotation);
// transfer entities/relations back to the original annotation
List<CoreMap> outputSentences = output.get(SentencesAnnotation.class);
List<CoreMap> origSentences = annotation.get(SentencesAnnotation.class);
for (int i = 0; i < outputSentences.size(); i++) {
CoreMap outSent = outputSentences.get(i);
CoreMap origSent = origSentences.get(i);
// set entities
List<EntityMention> entities = outSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
origSent.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, entities);
if (verbose && entities != null) {
log.info("Extracted the following entities:");
for (EntityMention e : entities) {
log.info("\t" + e);
}
}
// set relations
List<RelationMention> relations = outSent.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
origSent.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, relations);
if (verbose && relations != null) {
log.info("Extracted the following relations:");
for (RelationMention r : relations) {
if (!r.getType().equals(RelationMention.UNRELATED)) {
log.info(r);
}
}
}
}
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class TextOutputter method print.
/**
* The meat of the outputter
*/
private static void print(Annotation annotation, PrintWriter pw, Options options) throws IOException {
double beam = options.beamPrintingOption;
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
// Display docid if available
String docId = annotation.get(CoreAnnotations.DocIDAnnotation.class);
if (docId != null) {
List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
int nSentences = (sentences != null) ? sentences.size() : 0;
int nTokens = (tokens != null) ? tokens.size() : 0;
pw.printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens);
}
// Display doctitle if available
String docTitle = annotation.get(CoreAnnotations.DocTitleAnnotation.class);
if (docTitle != null) {
pw.printf("Document Title: %s%n", docTitle);
}
// Display docdate if available
String docDate = annotation.get(CoreAnnotations.DocDateAnnotation.class);
if (docDate != null) {
pw.printf("Document Date: %s%n", docDate);
}
// Display doctype if available
String docType = annotation.get(CoreAnnotations.DocTypeAnnotation.class);
if (docType != null) {
pw.printf("Document Type: %s%n", docType);
}
// Display docsourcetype if available
String docSourceType = annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class);
if (docSourceType != null) {
pw.printf("Document Source Type: %s%n", docSourceType);
}
// display each sentence in this annotation
if (sentences != null) {
for (int i = 0, sz = sentences.size(); i < sz; i++) {
CoreMap sentence = sentences.get(i);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
if (sentiment == null) {
sentiment = "";
} else {
sentiment = ", sentiment: " + sentiment;
}
pw.printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.size(), sentiment);
String text = sentence.get(CoreAnnotations.TextAnnotation.class);
pw.println(text);
// display the token-level annotations
String[] tokenAnnotations = { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" };
for (CoreLabel token : tokens) {
pw.print(token.toShorterString(tokenAnnotations));
pw.println();
}
// display the parse tree for this sentence
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
options.constituentTreePrinter.printTree(tree, pw);
}
// language which doesn't have dependencies, for example.
if (sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class) != null) {
pw.print(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).toList());
pw.println();
}
// display MachineReading entities and relations
List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (entities != null) {
pw.println("Extracted the following MachineReading entity mentions:");
for (EntityMention e : entities) {
pw.print('\t');
pw.println(e);
}
}
List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (relations != null) {
pw.println("Extracted the following MachineReading relation mentions:");
for (RelationMention r : relations) {
if (r.printableObject(beam)) {
pw.println(r);
}
}
}
// display OpenIE triples
Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openieTriples != null && openieTriples.size() > 0) {
pw.println("Extracted the following Open IE triples:");
for (RelationTriple triple : openieTriples) {
pw.println(OpenIE.tripleToString(triple, docId, sentence));
}
}
// display KBP triples
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null && kbpTriples.size() > 0) {
pw.println("Extracted the following KBP triples:");
for (RelationTriple triple : kbpTriples) {
pw.println(triple.toString());
}
}
}
}
// display the old-style doc-level coref annotations
// this is not supported anymore!
//String corefAnno = annotation.get(CorefPLAnnotation.class);
//if(corefAnno != null) os.println(corefAnno);
// display the new-style coreference graph
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null && sentences != null) {
for (CorefChain chain : corefChains.values()) {
CorefChain.CorefMention representative = chain.getRepresentativeMention();
boolean outputHeading = false;
for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
if (mention == representative)
continue;
if (!outputHeading) {
outputHeading = true;
pw.println("Coreference set:");
}
// all offsets start at 1!
pw.printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative.endIndex, mention.mentionSpan, representative.mentionSpan);
}
}
}
// display quotes if available
if (annotation.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
pw.println("Extracted quotes: ");
List<CoreMap> allQuotes = QuoteAnnotator.gatherQuotes(annotation);
for (CoreMap quote : allQuotes) {
pw.printf("[QuotationIndexAnnotation=%d, CharacterOffsetBegin=%d, Text=%s]%n", quote.get(CoreAnnotations.QuotationIndexAnnotation.class), quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), quote.get(CoreAnnotations.TextAnnotation.class));
}
}
pw.flush();
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class AceReader method convertAceEntityMention.
/**
* Convert an {@link AceEntityMention} to an {@link EntityMention}.
*
* @param entityMention {@link AceEntityMention} to convert
* @param docId ID of the document containing this entity mention
* @param sentence
* @param tokenOffset An offset in the calculations of position of the extent to sentence boundary
* (the ace.reader stores absolute token offset from the beginning of the document, but
* we need token offsets from the beginning of the sentence => adjust by tokenOffset)
* @return entity as an {@link EntityMention}
*/
private EntityMention convertAceEntityMention(AceEntityMention entityMention, String docId, CoreMap sentence, int tokenOffset) {
//log.info("TYPE is " + entityMention.getParent().getType());
//log.info("SUBTYPE is " + entityMention.getParent().getSubtype());
//log.info("LDCTYPE is " + entityMention.getLdctype());
AceCharSeq ext = entityMention.getExtent();
AceCharSeq head = entityMention.getHead();
int extStart = ext.getTokenStart() - tokenOffset;
int extEnd = ext.getTokenEnd() - tokenOffset + 1;
if (extStart < 0) {
logger.severe("READER ERROR: Invalid extent start " + extStart + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
logger.severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
extStart = 0;
}
if (extEnd > sentence.get(CoreAnnotations.TokensAnnotation.class).size()) {
logger.severe("READER ERROR: Invalid extent end " + extEnd + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
logger.severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
extEnd = sentence.get(CoreAnnotations.TokensAnnotation.class).size();
}
int headStart = head.getTokenStart() - tokenOffset;
int headEnd = head.getTokenEnd() - tokenOffset + 1;
if (headStart < 0) {
logger.severe("READER ERROR: Invalid head start " + headStart + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
logger.severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
headStart = 0;
}
if (headEnd > sentence.get(CoreAnnotations.TokensAnnotation.class).size()) {
logger.severe("READER ERROR: Invalid head end " + headEnd + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
logger.severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
headEnd = sentence.get(CoreAnnotations.TokensAnnotation.class).size();
}
// must adjust due to possible incorrect EOS detection
if (headStart < extStart) {
headStart = extStart;
}
if (headEnd > extEnd) {
headEnd = extEnd;
}
assert (headStart < headEnd);
// note: the ace.reader stores absolute token offset from the beginning of the document, but
// we need token offsets from the beginning of the sentence => adjust by tokenOffset
// note: in ace.reader the end token position is inclusive, but
// in our setup the end token position is exclusive => add 1 to end
EntityMention converted = new EntityMention(entityMention.getId(), sentence, new Span(extStart, extEnd), new Span(headStart, headEnd), entityMention.getParent().getType(), entityMention.getParent().getSubtype(), entityMention.getLdctype());
return converted;
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project cogcomp-nlp by CogComp.
the class StanfordRelationsHandler method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
Annotation document = new Annotation(ta.text);
pipeline.annotate(document);
SpanLabelView vu = new SpanLabelView(viewName, ta);
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
for (RelationMention rm : sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class)) {
if (rm.getType().equals("_NR"))
continue;
Map<String, Double> scores = new HashMap<>();
for (String label : rm.getTypeProbabilities().keySet()) scores.put(label, rm.getTypeProbabilities().getCount(label));
Constituent c1 = createConstituentGivenMention(rm.getEntityMentionArgs().get(0), ta);
Constituent c2 = createConstituentGivenMention(rm.getEntityMentionArgs().get(1), ta);
Relation r = new Relation(scores, c1, c2);
vu.addRelation(r);
if (!vu.containsConstituent(c1))
vu.addConstituent(c1);
if (!vu.containsConstituent(c2))
vu.addConstituent(c2);
}
}
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
for (EntityMention rm : sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
Constituent c = createConstituentGivenMention(rm, ta);
if (!vu.containsConstituent(c))
vu.addConstituent(c);
}
}
ta.addView(viewName, vu);
}
Aggregations