use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class XMLOutputter method annotationToDoc.
/**
* Converts the given annotation to an XML document using the specified options
*/
public static Document annotationToDoc(Annotation annotation, Options options) {
//
// create the XML document with the root node pointing to the namespace URL
//
Element root = new Element("root", NAMESPACE_URI);
Document xmlDoc = new Document(root);
ProcessingInstruction pi = new ProcessingInstruction("xml-stylesheet", "href=\"" + STYLESHEET_NAME + "\" type=\"text/xsl\"");
xmlDoc.insertChild(pi, 0);
Element docElem = new Element("document", NAMESPACE_URI);
root.appendChild(docElem);
setSingleElement(docElem, "docId", NAMESPACE_URI, annotation.get(CoreAnnotations.DocIDAnnotation.class));
setSingleElement(docElem, "docDate", NAMESPACE_URI, annotation.get(CoreAnnotations.DocDateAnnotation.class));
setSingleElement(docElem, "docSourceType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class));
setSingleElement(docElem, "docType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocTypeAnnotation.class));
setSingleElement(docElem, "author", NAMESPACE_URI, annotation.get(CoreAnnotations.AuthorAnnotation.class));
setSingleElement(docElem, "location", NAMESPACE_URI, annotation.get(CoreAnnotations.LocationAnnotation.class));
if (options.includeText) {
setSingleElement(docElem, "text", NAMESPACE_URI, annotation.get(CoreAnnotations.TextAnnotation.class));
}
Element sentencesElem = new Element("sentences", NAMESPACE_URI);
docElem.appendChild(sentencesElem);
//
if (annotation.get(CoreAnnotations.SentencesAnnotation.class) != null) {
int sentCount = 1;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Element sentElem = new Element("sentence", NAMESPACE_URI);
sentElem.addAttribute(new Attribute("id", Integer.toString(sentCount)));
Integer lineNumber = sentence.get(CoreAnnotations.LineNumberAnnotation.class);
if (lineNumber != null) {
sentElem.addAttribute(new Attribute("line", Integer.toString(lineNumber)));
}
sentCount++;
// add the word table with all token-level annotations
Element wordTable = new Element("tokens", NAMESPACE_URI);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (int j = 0; j < tokens.size(); j++) {
Element wordInfo = new Element("token", NAMESPACE_URI);
addWordInfo(wordInfo, tokens.get(j), j + 1, NAMESPACE_URI);
wordTable.appendChild(wordInfo);
}
sentElem.appendChild(wordTable);
// add tree info
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
// add the constituent tree for this sentence
Element parseInfo = new Element("parse", NAMESPACE_URI);
addConstituentTreeInfo(parseInfo, tree, options.constituentTreePrinter);
sentElem.appendChild(parseInfo);
}
SemanticGraph basicDependencies = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
if (basicDependencies != null) {
// add the dependencies for this sentence
Element depInfo = buildDependencyTreeInfo("basic-dependencies", sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("collapsed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("collapsed-ccprocessed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("enhanced-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("enhanced-plus-plus-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
}
// add Open IE triples
Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openieTriples != null) {
Element openieElem = new Element("openie", NAMESPACE_URI);
addTriples(openieTriples, openieElem, NAMESPACE_URI);
sentElem.appendChild(openieElem);
}
// add KBP triples
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null) {
Element kbpElem = new Element("kbp", NAMESPACE_URI);
addTriples(kbpTriples, kbpElem, NAMESPACE_URI);
sentElem.appendChild(kbpElem);
}
// add the MR entities and relations
List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (entities != null && !entities.isEmpty()) {
Element mrElem = new Element("MachineReading", NAMESPACE_URI);
Element entElem = new Element("entities", NAMESPACE_URI);
addEntities(entities, entElem, NAMESPACE_URI);
mrElem.appendChild(entElem);
if (relations != null) {
Element relElem = new Element("relations", NAMESPACE_URI);
addRelations(relations, relElem, NAMESPACE_URI, options.relationsBeam);
mrElem.appendChild(relElem);
}
sentElem.appendChild(mrElem);
}
/**
* Adds sentiment as an attribute of this sentence.
*/
Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
if (sentimentTree != null) {
int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
sentElem.addAttribute(new Attribute("sentimentValue", Integer.toString(sentiment)));
String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
sentElem.addAttribute(new Attribute("sentiment", sentimentClass.replaceAll(" ", "")));
}
// add the sentence to the root
sentencesElem.appendChild(sentElem);
}
}
//
// add the coref graph
//
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
Element corefInfo = new Element("coreference", NAMESPACE_URI);
if (addCorefGraphInfo(options, corefInfo, sentences, corefChains, NAMESPACE_URI))
docElem.appendChild(corefInfo);
}
return xmlDoc;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class JSONOutputter method print.
/** {@inheritDoc} */
// It's lying; we need the "redundant" casts (as of 2014-09-08)
@SuppressWarnings("RedundantCast")
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
JSONWriter l0 = new JSONWriter(writer, options);
l0.object(l1 -> {
l1.set("docId", doc.get(CoreAnnotations.DocIDAnnotation.class));
l1.set("docDate", doc.get(CoreAnnotations.DocDateAnnotation.class));
l1.set("docSourceType", doc.get(CoreAnnotations.DocSourceTypeAnnotation.class));
l1.set("docType", doc.get(CoreAnnotations.DocTypeAnnotation.class));
l1.set("author", doc.get(CoreAnnotations.AuthorAnnotation.class));
l1.set("location", doc.get(CoreAnnotations.LocationAnnotation.class));
if (options.includeText) {
l1.set("text", doc.get(CoreAnnotations.TextAnnotation.class));
}
if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
l1.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<Writer>) (Writer l2) -> {
l2.set("id", sentence.get(CoreAnnotations.SentenceIDAnnotation.class));
l2.set("index", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
l2.set("line", sentence.get(CoreAnnotations.LineNumberAnnotation.class));
StringWriter treeStrWriter = new StringWriter();
TreePrint treePrinter = options.constituentTreePrinter;
if (treePrinter == AnnotationOutputter.DEFAULT_CONSTITUENT_TREE_PRINTER) {
treePrinter = new TreePrint("oneline");
}
treePrinter.printTree(sentence.get(TreeCoreAnnotations.TreeAnnotation.class), new PrintWriter(treeStrWriter, true));
String treeStr = treeStrWriter.toString().trim();
if (!"SENTENCE_SKIPPED_OR_UNPARSABLE".equals(treeStr)) {
l2.set("parse", treeStr);
}
l2.set("basicDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)));
l2.set("enhancedDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)));
l2.set("enhancedPlusPlusDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)));
Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
if (sentimentTree != null) {
int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
l2.set("sentimentValue", Integer.toString(sentiment));
l2.set("sentiment", sentimentClass.replaceAll(" ", ""));
}
Collection<RelationTriple> openIETriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openIETriples != null) {
l2.set("openie", openIETriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
tripleWriter.set("subject", triple.subjectGloss());
tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
tripleWriter.set("relation", triple.relationGloss());
tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
tripleWriter.set("object", triple.objectGloss());
tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
}));
}
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null) {
l2.set("kbp", kbpTriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
tripleWriter.set("subject", triple.subjectGloss());
tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
tripleWriter.set("relation", triple.relationGloss());
tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
tripleWriter.set("object", triple.objectGloss());
tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
}));
}
if (sentence.get(CoreAnnotations.MentionsAnnotation.class) != null) {
Integer sentTokenBegin = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
l2.set("entitymentions", sentence.get(CoreAnnotations.MentionsAnnotation.class).stream().map(m -> (Consumer<Writer>) (Writer l3) -> {
Integer tokenBegin = m.get(CoreAnnotations.TokenBeginAnnotation.class);
Integer tokenEnd = m.get(CoreAnnotations.TokenEndAnnotation.class);
l3.set("docTokenBegin", tokenBegin);
l3.set("docTokenEnd", tokenEnd);
if (tokenBegin != null && sentTokenBegin != null) {
l3.set("tokenBegin", tokenBegin - sentTokenBegin);
}
if (tokenEnd != null && sentTokenBegin != null) {
l3.set("tokenEnd", tokenEnd - sentTokenBegin);
}
l3.set("text", m.get(CoreAnnotations.TextAnnotation.class));
l3.set("characterOffsetBegin", m.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
l3.set("characterOffsetEnd", m.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
l3.set("ner", m.get(CoreAnnotations.NamedEntityTagAnnotation.class));
l3.set("normalizedNER", m.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
l3.set("entitylink", m.get(CoreAnnotations.WikipediaEntityAnnotation.class));
Timex time = m.get(TimeAnnotations.TimexAnnotation.class);
if (time != null) {
Timex.Range range = time.range();
l3.set("timex", (Consumer<Writer>) l4 -> {
l4.set("tid", time.tid());
l4.set("type", time.timexType());
l4.set("value", time.value());
l4.set("altValue", time.altVal());
l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
l5.set("begin", range.begin);
l5.set("end", range.end);
l5.set("duration", range.duration);
} : null);
});
}
}));
}
if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
l2.set("tokens", sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(token -> (Consumer<Writer>) (Writer l3) -> {
l3.set("index", token.index());
l3.set("word", token.word());
l3.set("originalText", token.originalText());
l3.set("lemma", token.lemma());
l3.set("characterOffsetBegin", token.beginPosition());
l3.set("characterOffsetEnd", token.endPosition());
l3.set("pos", token.tag());
l3.set("ner", token.ner());
l3.set("normalizedNER", token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
l3.set("speaker", token.get(CoreAnnotations.SpeakerAnnotation.class));
l3.set("truecase", token.get(CoreAnnotations.TrueCaseAnnotation.class));
l3.set("truecaseText", token.get(CoreAnnotations.TrueCaseTextAnnotation.class));
l3.set("before", token.get(CoreAnnotations.BeforeAnnotation.class));
l3.set("after", token.get(CoreAnnotations.AfterAnnotation.class));
l3.set("entitylink", token.get(CoreAnnotations.WikipediaEntityAnnotation.class));
Timex time = token.get(TimeAnnotations.TimexAnnotation.class);
if (time != null) {
Timex.Range range = time.range();
l3.set("timex", (Consumer<Writer>) l4 -> {
l4.set("tid", time.tid());
l4.set("type", time.timexType());
l4.set("value", time.value());
l4.set("altValue", time.altVal());
l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
l5.set("begin", range.begin);
l5.set("end", range.end);
l5.set("duration", range.duration);
} : null);
});
}
}));
}
}));
}
if (doc.get(CorefCoreAnnotations.CorefChainAnnotation.class) != null) {
Map<Integer, CorefChain> corefChains = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
l1.set("corefs", (Consumer<Writer>) chainWriter -> {
for (CorefChain chain : corefChains.values()) {
CorefChain.CorefMention representative = chain.getRepresentativeMention();
chainWriter.set(Integer.toString(chain.getChainID()), chain.getMentionsInTextualOrder().stream().map(mention -> (Consumer<Writer>) (Writer mentionWriter) -> {
mentionWriter.set("id", mention.mentionID);
mentionWriter.set("text", mention.mentionSpan);
mentionWriter.set("type", mention.mentionType);
mentionWriter.set("number", mention.number);
mentionWriter.set("gender", mention.gender);
mentionWriter.set("animacy", mention.animacy);
mentionWriter.set("startIndex", mention.startIndex);
mentionWriter.set("endIndex", mention.endIndex);
mentionWriter.set("headIndex", mention.headIndex);
mentionWriter.set("sentNum", mention.sentNum);
mentionWriter.set("position", Arrays.stream(mention.position.elems()).boxed().collect(Collectors.toList()));
mentionWriter.set("isRepresentativeMention", mention == representative);
}));
}
});
}
}
if (doc.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
List<CoreMap> quotes = QuoteAnnotator.gatherQuotes(doc);
l1.set("quotes", quotes.stream().map(quote -> (Consumer<Writer>) (Writer l2) -> {
l2.set("id", quote.get(CoreAnnotations.QuotationIndexAnnotation.class));
l2.set("text", quote.get(CoreAnnotations.TextAnnotation.class));
l2.set("beginIndex", quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
l2.set("endIndex", quote.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
l2.set("beginToken", quote.get(CoreAnnotations.TokenBeginAnnotation.class));
l2.set("endToken", quote.get(CoreAnnotations.TokenEndAnnotation.class));
l2.set("beginSentence", quote.get(CoreAnnotations.SentenceBeginAnnotation.class));
l2.set("endSentence", quote.get(CoreAnnotations.SentenceEndAnnotation.class));
}));
}
});
// flush
l0.writer.flush();
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class ProtobufAnnotationSerializer method fromProto.
/**
* Return a {@link RelationTriple} object from the serialized representation.
* This requires a sentence and a document so that
* (1) we have a docid for the dependency tree can be accurately rebuilt,
* and (2) we have references to the tokens to include in the relation triple.
*
* @param proto The serialized relation triples.
* @param doc The document we are deserializing. This document should already
* have a docid annotation set, if there is one.
* @param sentenceIndex The index of the sentence this extraction should be attached to.
*
* @return A relation triple as a Java object, corresponding to the seriaized proto.
*/
public static RelationTriple fromProto(CoreNLPProtos.RelationTriple proto, Annotation doc, int sentenceIndex) {
if (Thread.interrupted()) {
throw new RuntimeInterruptedException();
}
// Get the spans for the extraction
List<CoreLabel> subject = proto.getSubjectTokensList().stream().map(loc -> doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())).collect(Collectors.toList());
List<CoreLabel> relation;
if (proto.getRelationTokensCount() == 0) {
// If we don't have a real span for the relation, make a dummy word
relation = Collections.singletonList(new CoreLabel(new Word(proto.getRelation())));
} else {
relation = proto.getRelationTokensList().stream().map(loc -> doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())).collect(Collectors.toList());
}
List<CoreLabel> object = proto.getObjectTokensList().stream().map(loc -> doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())).collect(Collectors.toList());
// Create the extraction
RelationTriple extraction;
double confidence = proto.getConfidence();
if (proto.hasTree()) {
SemanticGraph tree = fromProto(proto.getTree(), doc.get(SentencesAnnotation.class).get(sentenceIndex).get(TokensAnnotation.class), doc.get(DocIDAnnotation.class), Optional.of(doc));
extraction = new RelationTriple.WithTree(subject, relation, object, tree, confidence);
} else {
extraction = new RelationTriple(subject, relation, object, confidence);
}
// Tweak the extraction
if (proto.hasIstmod()) {
extraction.istmod(proto.getIstmod());
}
if (proto.hasPrefixBe()) {
extraction.isPrefixBe(proto.getPrefixBe());
}
if (proto.hasSuffixBe()) {
extraction.isSuffixBe(proto.getSuffixBe());
}
if (proto.hasSuffixOf()) {
extraction.isSuffixOf(proto.getSuffixOf());
}
// Return
return extraction;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class ProtobufAnnotationSerializer method fromProto.
/**
* Returns a complete document, intended to mimic a document passes as input to
* {@link ProtobufAnnotationSerializer#toProto(Annotation)} as closely as possible.
* That is, most common fields are serialized, but there is not guarantee that custom additions
* will be saved and retrieved.
*
* @param proto The protocol buffer to read the document from.
* @return An Annotation corresponding to the read protobuf.
*/
@SuppressWarnings("deprecation")
public Annotation fromProto(CoreNLPProtos.Document proto) {
if (Thread.interrupted()) {
throw new RuntimeInterruptedException();
}
// Set text
Annotation ann = new Annotation(proto.getText());
// if there are characters, add characters
if (proto.getCharacterCount() > 0) {
List<CoreLabel> docChars = new ArrayList<CoreLabel>();
for (CoreNLPProtos.Token c : proto.getCharacterList()) {
docChars.add(fromProto(c));
}
ann.set(SegmenterCoreAnnotations.CharactersAnnotation.class, docChars);
}
// Add tokens
List<CoreLabel> tokens = new ArrayList<>();
if (proto.getSentenceCount() > 0) {
// Populate the tokens from the sentence
for (CoreNLPProtos.Sentence sentence : proto.getSentenceList()) {
// It's conceivable that the sentences are not contiguous -- pad this with nulls
while (sentence.hasTokenOffsetBegin() && tokens.size() < sentence.getTokenOffsetBegin()) {
tokens.add(null);
}
// Read the sentence
for (CoreNLPProtos.Token token : sentence.getTokenList()) {
CoreLabel coreLabel = fromProto(token);
// Set docid
if (proto.hasDocID()) {
coreLabel.setDocID(proto.getDocID());
}
if (token.hasTokenBeginIndex() && token.hasTokenEndIndex()) {
// This is usually true, if enough annotators are defined
while (tokens.size() < sentence.getTokenOffsetEnd()) {
tokens.add(null);
}
for (int i = token.getTokenBeginIndex(); i < token.getTokenEndIndex(); ++i) {
tokens.set(token.getTokenBeginIndex(), coreLabel);
}
} else {
// Assume this token spans a single token, and just add it to the tokens list
tokens.add(coreLabel);
}
}
}
} else if (proto.getSentencelessTokenCount() > 0) {
// Eek -- no sentences. Try to recover tokens directly
if (proto.getSentencelessTokenCount() > 0) {
for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
CoreLabel coreLabel = fromProto(token);
// Set docid
if (proto.hasDocID()) {
coreLabel.setDocID(proto.getDocID());
}
tokens.add(coreLabel);
}
}
}
if (!tokens.isEmpty()) {
ann.set(TokensAnnotation.class, tokens);
}
// Add sentences
List<CoreMap> sentences = new ArrayList<>(proto.getSentenceCount());
for (int sentIndex = 0; sentIndex < proto.getSentenceCount(); ++sentIndex) {
CoreNLPProtos.Sentence sentence = proto.getSentence(sentIndex);
CoreMap map = fromProtoNoTokens(sentence);
if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd() && map.get(TokensAnnotation.class) == null) {
// Set tokens for sentence
int tokenBegin = sentence.getTokenOffsetBegin();
int tokenEnd = sentence.getTokenOffsetEnd();
assert tokenBegin <= tokens.size() && tokenBegin <= tokenEnd;
assert tokenEnd <= tokens.size();
map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
// Set sentence index + token index + paragraph index
for (int i = tokenBegin; i < tokenEnd; ++i) {
tokens.get(i).setSentIndex(sentIndex);
tokens.get(i).setIndex(i - sentence.getTokenOffsetBegin() + 1);
if (sentence.hasParagraph()) {
tokens.get(i).set(ParagraphAnnotation.class, sentence.getParagraph());
}
}
// Set text
int characterBegin = sentence.getCharacterOffsetBegin();
int characterEnd = sentence.getCharacterOffsetEnd();
if (characterEnd <= proto.getText().length()) {
// The usual case -- get the text from the document text
map.set(TextAnnotation.class, proto.getText().substring(characterBegin, characterEnd));
} else {
// The document text is wrong -- guess the text from the tokens
map.set(TextAnnotation.class, recoverOriginalText(tokens.subList(tokenBegin, tokenEnd), sentence));
}
}
// End iteration
sentences.add(map);
}
if (!sentences.isEmpty()) {
ann.set(SentencesAnnotation.class, sentences);
}
// Set DocID
String docid = null;
if (proto.hasDocID()) {
docid = proto.getDocID();
ann.set(DocIDAnnotation.class, docid);
}
// Set reference time
if (proto.hasDocDate()) {
ann.set(DocDateAnnotation.class, proto.getDocDate());
}
if (proto.hasCalendar()) {
GregorianCalendar calendar = new GregorianCalendar();
calendar.setTimeInMillis(proto.getCalendar());
ann.set(CalendarAnnotation.class, calendar);
}
// Set coref chain
Map<Integer, CorefChain> corefChains = new HashMap<>();
for (CoreNLPProtos.CorefChain chainProto : proto.getCorefChainList()) {
CorefChain chain = fromProto(chainProto, ann);
corefChains.put(chain.getChainID(), chain);
}
if (!corefChains.isEmpty()) {
ann.set(CorefChainAnnotation.class, corefChains);
}
// hashes to access Mentions , later in this method need to add speakerInfo to Mention
// so we need to create id -> Mention, CoreNLPProtos.Mention maps to do this, since SpeakerInfo could reference
// any Mention in doc
HashMap<Integer, Mention> idToMention = new HashMap<>();
HashMap<Integer, CoreNLPProtos.Mention> idToProtoMention = new HashMap<>();
// Set things in the sentence that need a document context.
for (int sentenceIndex = 0; sentenceIndex < proto.getSentenceCount(); ++sentenceIndex) {
CoreNLPProtos.Sentence sentence = proto.getSentenceList().get(sentenceIndex);
CoreMap map = sentences.get(sentenceIndex);
List<CoreLabel> sentenceTokens = map.get(TokensAnnotation.class);
// Set dependency graphs
if (sentence.hasBasicDependencies()) {
map.set(BasicDependenciesAnnotation.class, fromProto(sentence.getBasicDependencies(), sentenceTokens, docid));
}
if (sentence.hasCollapsedDependencies()) {
map.set(CollapsedDependenciesAnnotation.class, fromProto(sentence.getCollapsedDependencies(), sentenceTokens, docid));
}
if (sentence.hasCollapsedCCProcessedDependencies()) {
map.set(CollapsedCCProcessedDependenciesAnnotation.class, fromProto(sentence.getCollapsedCCProcessedDependencies(), sentenceTokens, docid));
}
if (sentence.hasAlternativeDependencies()) {
map.set(AlternativeDependenciesAnnotation.class, fromProto(sentence.getAlternativeDependencies(), sentenceTokens, docid));
}
if (sentence.hasEnhancedDependencies()) {
map.set(EnhancedDependenciesAnnotation.class, fromProto(sentence.getEnhancedDependencies(), sentenceTokens, docid));
}
if (sentence.hasEnhancedPlusPlusDependencies()) {
map.set(EnhancedPlusPlusDependenciesAnnotation.class, fromProto(sentence.getEnhancedPlusPlusDependencies(), sentenceTokens, docid));
}
// Set entailed sentences
if (sentence.getEntailedSentenceCount() > 0) {
Set<SentenceFragment> entailedSentences = sentence.getEntailedSentenceList().stream().map(frag -> fromProto(frag, map.get(EnhancedPlusPlusDependenciesAnnotation.class))).collect(Collectors.toSet());
map.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, entailedSentences);
}
if (sentence.getEntailedClauseCount() > 0) {
Set<SentenceFragment> entailedClauses = sentence.getEntailedClauseList().stream().map(frag -> fromProto(frag, map.get(CollapsedDependenciesAnnotation.class))).collect(Collectors.toSet());
map.set(NaturalLogicAnnotations.EntailedClausesAnnotation.class, entailedClauses);
}
// Set relation triples
if (sentence.getOpenieTripleCount() > 0) {
List<RelationTriple> triples = new ArrayList<>();
for (CoreNLPProtos.RelationTriple triple : sentence.getOpenieTripleList()) {
triples.add(fromProto(triple, ann, sentenceIndex));
}
map.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, triples);
}
// Redo some light annotation
if (map.containsKey(TokensAnnotation.class) && (!sentence.hasHasNumerizedTokensAnnotation() || sentence.getHasNumerizedTokensAnnotation())) {
map.set(NumerizedTokensAnnotation.class, NumberNormalizer.findAndMergeNumbers(map));
}
// add the CoreLabel and IndexedWord info to each mention
// when Mentions are serialized, just storing the index in the sentence for CoreLabels and IndexedWords
// this is the point where the de-serialized sentence has tokens
int mentionInt = 0;
for (CoreNLPProtos.Mention protoMention : sentence.getMentionsForCorefList()) {
// get the mention
Mention mentionToUpdate = map.get(CorefMentionsAnnotation.class).get(mentionInt);
// store these in hash for more processing later in this method
idToMention.put(mentionToUpdate.mentionID, mentionToUpdate);
idToProtoMention.put(mentionToUpdate.mentionID, protoMention);
// update the values
int headIndexedWordIndex = protoMention.getHeadIndexedWord().getTokenIndex();
if (headIndexedWordIndex >= 0) {
mentionToUpdate.headIndexedWord = new IndexedWord(sentenceTokens.get(protoMention.getHeadIndexedWord().getTokenIndex()));
mentionToUpdate.headIndexedWord.setCopyCount(protoMention.getHeadIndexedWord().getCopyCount());
}
int dependingVerbIndex = protoMention.getDependingVerb().getTokenIndex();
if (dependingVerbIndex >= 0) {
mentionToUpdate.dependingVerb = new IndexedWord(sentenceTokens.get(protoMention.getDependingVerb().getTokenIndex()));
mentionToUpdate.dependingVerb.setCopyCount(protoMention.getDependingVerb().getCopyCount());
}
int headWordIndex = protoMention.getHeadWord().getTokenIndex();
if (headWordIndex >= 0) {
mentionToUpdate.headWord = sentenceTokens.get(protoMention.getHeadWord().getTokenIndex());
}
mentionToUpdate.sentenceWords = new ArrayList<>();
for (CoreNLPProtos.IndexedWord clp : protoMention.getSentenceWordsList()) {
int ti = clp.getTokenIndex();
mentionToUpdate.sentenceWords.add(sentenceTokens.get(ti));
}
mentionToUpdate.originalSpan = new ArrayList<>();
for (CoreNLPProtos.IndexedWord clp : protoMention.getOriginalSpanList()) {
int ti = clp.getTokenIndex();
mentionToUpdate.originalSpan.add(sentenceTokens.get(ti));
}
if (protoMention.getHasBasicDependency()) {
mentionToUpdate.basicDependency = map.get(BasicDependenciesAnnotation.class);
}
if (protoMention.getHasEnhancedDepenedncy()) {
mentionToUpdate.enhancedDependency = map.get(EnhancedDependenciesAnnotation.class);
}
if (protoMention.getHasContextParseTree()) {
mentionToUpdate.contextParseTree = map.get(TreeAnnotation.class);
}
// move on to next mention
mentionInt++;
}
}
// Set quotes
List<CoreMap> quotes = proto.getQuoteList().stream().map(quote -> fromProto(quote, tokens)).collect(Collectors.toList());
if (!quotes.isEmpty()) {
ann.set(QuotationsAnnotation.class, quotes);
}
// Set NERmention
List<CoreMap> mentions = proto.getMentionsList().stream().map(this::fromProto).collect(Collectors.toList());
if (!mentions.isEmpty()) {
ann.set(MentionsAnnotation.class, mentions);
}
// also add all the Set<Mention>
for (int mentionID : idToMention.keySet()) {
// this is the Mention message corresponding to this Mention
Mention mentionToUpdate = idToMention.get(mentionID);
CoreNLPProtos.Mention correspondingProtoMention = idToProtoMention.get(mentionID);
if (!correspondingProtoMention.hasSpeakerInfo()) {
// so just continue to next Mention
continue;
}
// if we're here we know a speakerInfo was stored
SpeakerInfo speakerInfo = fromProto(correspondingProtoMention.getSpeakerInfo());
// MentionID is ID in document, 0, 1, 2, etc...
for (int speakerInfoMentionID : correspondingProtoMention.getSpeakerInfo().getMentionsList()) {
speakerInfo.addMention(idToMention.get(speakerInfoMentionID));
}
// now the SpeakerInfo for this Mention should be fully restored
mentionToUpdate.speakerInfo = speakerInfo;
}
// Return
return ann;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class OpenIE method processDocument.
/**
* Process a single file or line of standard in.
* @param pipeline The annotation pipeline to run the lines of the input through.
* @param docid The docid of the document we are extracting.
* @param document the document to annotate.
*/
@SuppressWarnings("SynchronizeOnNonFinalField")
private static void processDocument(AnnotationPipeline pipeline, String docid, String document) {
// Error checks
if (document.trim().equals("")) {
return;
}
// Annotate the document
Annotation ann = new Annotation(document);
pipeline.annotate(ann);
// Get the extractions
boolean empty = true;
synchronized (OUTPUT) {
for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
for (RelationTriple extraction : sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
// Print the extractions
OUTPUT.println(tripleToString(extraction, docid, sentence));
empty = false;
}
}
}
if (empty) {
log.info("No extractions in: " + ("stdin".equals(docid) ? document : docid));
}
}
Aggregations