use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class ProtobufAnnotationSerializer method toProtoBuilder.
/**
* <p>
* The method to extend by subclasses of the Protobuf Annotator if custom additions are added to Tokens.
* In contrast to {@link ProtobufAnnotationSerializer#toProto(edu.stanford.nlp.ling.CoreLabel)}, this function
* returns a builder that can be extended.
* </p>
*
* @param sentence The sentence to save to a protocol buffer
* @param keysToSerialize A set tracking which keys have been saved. It's important to remove any keys added to the proto
* from this set, as the code tracks annotations to ensure lossless serialization.
*/
@SuppressWarnings("deprecation")
protected CoreNLPProtos.Sentence.Builder toProtoBuilder(CoreMap sentence, Set<Class<?>> keysToSerialize) {
// Error checks
if (sentence instanceof CoreLabel) {
throw new IllegalArgumentException("CoreMap is actually a CoreLabel");
}
CoreNLPProtos.Sentence.Builder builder = CoreNLPProtos.Sentence.newBuilder();
// Remove items serialized elsewhere from the required list
keysToSerialize.remove(TextAnnotation.class);
keysToSerialize.remove(NumerizedTokensAnnotation.class);
// Required fields
builder.setTokenOffsetBegin(getAndRegister(sentence, keysToSerialize, TokenBeginAnnotation.class));
builder.setTokenOffsetEnd(getAndRegister(sentence, keysToSerialize, TokenEndAnnotation.class));
// Get key set of CoreMap
Set<Class<?>> keySet;
if (sentence instanceof ArrayCoreMap) {
keySet = ((ArrayCoreMap) sentence).keySetNotNull();
} else {
keySet = new IdentityHashSet<>(sentence.keySet());
}
// Tokens
if (sentence.containsKey(TokensAnnotation.class)) {
for (CoreLabel tok : sentence.get(TokensAnnotation.class)) {
builder.addToken(toProto(tok));
}
keysToSerialize.remove(TokensAnnotation.class);
}
// Characters
if (sentence.containsKey(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
for (CoreLabel c : sentence.get(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
builder.addCharacter(toProto(c));
}
keysToSerialize.remove(SegmenterCoreAnnotations.CharactersAnnotation.class);
}
// Optional fields
if (keySet.contains(SentenceIndexAnnotation.class)) {
builder.setSentenceIndex(getAndRegister(sentence, keysToSerialize, SentenceIndexAnnotation.class));
}
if (keySet.contains(CharacterOffsetBeginAnnotation.class)) {
builder.setCharacterOffsetBegin(getAndRegister(sentence, keysToSerialize, CharacterOffsetBeginAnnotation.class));
}
if (keySet.contains(CharacterOffsetEndAnnotation.class)) {
builder.setCharacterOffsetEnd(getAndRegister(sentence, keysToSerialize, CharacterOffsetEndAnnotation.class));
}
if (keySet.contains(TreeAnnotation.class)) {
builder.setParseTree(toProto(getAndRegister(sentence, keysToSerialize, TreeAnnotation.class)));
}
if (keySet.contains(BinarizedTreeAnnotation.class)) {
builder.setBinarizedParseTree(toProto(getAndRegister(sentence, keysToSerialize, BinarizedTreeAnnotation.class)));
}
if (keySet.contains(KBestTreesAnnotation.class)) {
for (Tree tree : sentence.get(KBestTreesAnnotation.class)) {
builder.addKBestParseTrees(toProto(tree));
keysToSerialize.remove(KBestTreesAnnotation.class);
}
}
if (keySet.contains(SentimentCoreAnnotations.SentimentAnnotatedTree.class)) {
builder.setAnnotatedParseTree(toProto(getAndRegister(sentence, keysToSerialize, SentimentCoreAnnotations.SentimentAnnotatedTree.class)));
}
if (keySet.contains(SentimentCoreAnnotations.SentimentClass.class)) {
builder.setSentiment(getAndRegister(sentence, keysToSerialize, SentimentCoreAnnotations.SentimentClass.class));
}
if (keySet.contains(BasicDependenciesAnnotation.class)) {
builder.setBasicDependencies(toProto(getAndRegister(sentence, keysToSerialize, BasicDependenciesAnnotation.class)));
}
if (keySet.contains(CollapsedDependenciesAnnotation.class)) {
builder.setCollapsedDependencies(toProto(getAndRegister(sentence, keysToSerialize, CollapsedDependenciesAnnotation.class)));
}
if (keySet.contains(CollapsedCCProcessedDependenciesAnnotation.class)) {
builder.setCollapsedCCProcessedDependencies(toProto(getAndRegister(sentence, keysToSerialize, CollapsedCCProcessedDependenciesAnnotation.class)));
}
if (keySet.contains(AlternativeDependenciesAnnotation.class)) {
builder.setAlternativeDependencies(toProto(getAndRegister(sentence, keysToSerialize, AlternativeDependenciesAnnotation.class)));
}
if (keySet.contains(EnhancedDependenciesAnnotation.class)) {
builder.setEnhancedDependencies(toProto(getAndRegister(sentence, keysToSerialize, EnhancedDependenciesAnnotation.class)));
}
if (keySet.contains(EnhancedPlusPlusDependenciesAnnotation.class)) {
builder.setEnhancedPlusPlusDependencies(toProto(getAndRegister(sentence, keysToSerialize, EnhancedPlusPlusDependenciesAnnotation.class)));
}
if (keySet.contains(TokensAnnotation.class) && getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).size() > 0 && getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).get(0).containsKey(ParagraphAnnotation.class)) {
builder.setParagraph(getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).get(0).get(ParagraphAnnotation.class));
}
if (keySet.contains(NumerizedTokensAnnotation.class)) {
builder.setHasNumerizedTokensAnnotation(true);
} else {
builder.setHasNumerizedTokensAnnotation(false);
}
if (keySet.contains(NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
for (SentenceFragment entailedSentence : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
builder.addEntailedSentence(toProto(entailedSentence));
}
}
if (keySet.contains(NaturalLogicAnnotations.EntailedClausesAnnotation.class)) {
for (SentenceFragment entailedClause : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.EntailedClausesAnnotation.class)) {
builder.addEntailedClause(toProto(entailedClause));
}
}
if (keySet.contains(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
for (RelationTriple triple : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
builder.addOpenieTriple(toProto(triple));
}
}
if (keySet.contains(KBPTriplesAnnotation.class)) {
for (RelationTriple triple : getAndRegister(sentence, keysToSerialize, KBPTriplesAnnotation.class)) {
builder.addKbpTriple(toProto(triple));
}
}
// Non-default annotators
if (keySet.contains(EntityMentionsAnnotation.class)) {
builder.setHasRelationAnnotations(true);
for (EntityMention entity : getAndRegister(sentence, keysToSerialize, EntityMentionsAnnotation.class)) {
builder.addEntity(toProto(entity));
}
} else {
builder.setHasRelationAnnotations(false);
}
if (keySet.contains(RelationMentionsAnnotation.class)) {
if (!builder.getHasRelationAnnotations()) {
throw new IllegalStateException("Registered entity mentions without relation mentions");
}
for (RelationMention relation : getAndRegister(sentence, keysToSerialize, RelationMentionsAnnotation.class)) {
builder.addRelation(toProto(relation));
}
}
// add each of the mentions in the List<Mentions> for this sentence
if (keySet.contains(CorefMentionsAnnotation.class)) {
builder.setHasCorefMentionsAnnotation(true);
for (Mention m : sentence.get(CorefMentionsAnnotation.class)) {
builder.addMentionsForCoref(toProto(m));
}
keysToSerialize.remove(CorefMentionsAnnotation.class);
}
// Entity mentions
if (keySet.contains(MentionsAnnotation.class)) {
for (CoreMap mention : sentence.get(MentionsAnnotation.class)) {
builder.addMentions(toProtoMention(mention));
}
keysToSerialize.remove(MentionsAnnotation.class);
}
// add a sentence id if it exists
if (keySet.contains(SentenceIDAnnotation.class))
builder.setSentenceID(getAndRegister(sentence, keysToSerialize, SentenceIDAnnotation.class));
// Return
return builder;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class TextOutputter method print.
/**
* The meat of the outputter
*/
private static void print(Annotation annotation, PrintWriter pw, Options options) throws IOException {
double beam = options.beamPrintingOption;
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
// Display docid if available
String docId = annotation.get(CoreAnnotations.DocIDAnnotation.class);
if (docId != null) {
List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
int nSentences = (sentences != null) ? sentences.size() : 0;
int nTokens = (tokens != null) ? tokens.size() : 0;
pw.printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens);
}
// Display doctitle if available
String docTitle = annotation.get(CoreAnnotations.DocTitleAnnotation.class);
if (docTitle != null) {
pw.printf("Document Title: %s%n", docTitle);
}
// Display docdate if available
String docDate = annotation.get(CoreAnnotations.DocDateAnnotation.class);
if (docDate != null) {
pw.printf("Document Date: %s%n", docDate);
}
// Display doctype if available
String docType = annotation.get(CoreAnnotations.DocTypeAnnotation.class);
if (docType != null) {
pw.printf("Document Type: %s%n", docType);
}
// Display docsourcetype if available
String docSourceType = annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class);
if (docSourceType != null) {
pw.printf("Document Source Type: %s%n", docSourceType);
}
// display each sentence in this annotation
if (sentences != null) {
for (int i = 0, sz = sentences.size(); i < sz; i++) {
CoreMap sentence = sentences.get(i);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
if (sentiment == null) {
sentiment = "";
} else {
sentiment = ", sentiment: " + sentiment;
}
pw.printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.size(), sentiment);
String text = sentence.get(CoreAnnotations.TextAnnotation.class);
pw.println(text);
// display the token-level annotations
String[] tokenAnnotations = { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" };
for (CoreLabel token : tokens) {
pw.print(token.toShorterString(tokenAnnotations));
pw.println();
}
// display the parse tree for this sentence
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
options.constituentTreePrinter.printTree(tree, pw);
}
// language which doesn't have dependencies, for example.
if (sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class) != null) {
pw.print(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).toList());
pw.println();
}
// display MachineReading entities and relations
List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (entities != null) {
pw.println("Extracted the following MachineReading entity mentions:");
for (EntityMention e : entities) {
pw.print('\t');
pw.println(e);
}
}
List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (relations != null) {
pw.println("Extracted the following MachineReading relation mentions:");
for (RelationMention r : relations) {
if (r.printableObject(beam)) {
pw.println(r);
}
}
}
// display OpenIE triples
Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openieTriples != null && openieTriples.size() > 0) {
pw.println("Extracted the following Open IE triples:");
for (RelationTriple triple : openieTriples) {
pw.println(OpenIE.tripleToString(triple, docId, sentence));
}
}
// display KBP triples
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null && kbpTriples.size() > 0) {
pw.println("Extracted the following KBP triples:");
for (RelationTriple triple : kbpTriples) {
pw.println(triple.toString());
}
}
}
}
// display the old-style doc-level coref annotations
// this is not supported anymore!
//String corefAnno = annotation.get(CorefPLAnnotation.class);
//if(corefAnno != null) os.println(corefAnno);
// display the new-style coreference graph
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null && sentences != null) {
for (CorefChain chain : corefChains.values()) {
CorefChain.CorefMention representative = chain.getRepresentativeMention();
boolean outputHeading = false;
for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
if (mention == representative)
continue;
if (!outputHeading) {
outputHeading = true;
pw.println("Coreference set:");
}
// all offsets start at 1!
pw.printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative.endIndex, mention.mentionSpan, representative.mentionSpan);
}
}
}
// display quotes if available
if (annotation.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
pw.println("Extracted quotes: ");
List<CoreMap> allQuotes = QuoteAnnotator.gatherQuotes(annotation);
for (CoreMap quote : allQuotes) {
pw.printf("[QuotationIndexAnnotation=%d, CharacterOffsetBegin=%d, Text=%s]%n", quote.get(CoreAnnotations.QuotationIndexAnnotation.class), quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), quote.get(CoreAnnotations.TextAnnotation.class));
}
}
pw.flush();
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class Sentence method openieTriples.
/**
* Get the OpenIE triples associated with this sentence.
* Note that this function may be slower than you would expect, as it has to
* convert the underlying Protobuf representation back into {@link CoreLabel}s.
*
* @param props The properties to use for the OpenIE annotator.
* @return A collection of {@link RelationTriple} objects representing the OpenIE triples in the sentence.
*/
public Collection<RelationTriple> openieTriples(Properties props) {
document.runOpenie(props);
synchronized (impl) {
List<CoreLabel> tokens = asCoreLabels();
Annotation doc = document.asAnnotation();
return impl.getOpenieTripleList().stream().map(x -> ProtobufAnnotationSerializer.fromProto(x, doc, this.sentenceIndex())).collect(Collectors.toList());
}
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class Sentence method kbpTriples.
/**
* Get the KBP triples associated with this sentence.
* Note that this function may be slower than you would expect, as it has to
* convert the underlying Protobuf representation back into {@link CoreLabel}s.
*
* @param props The properties to use for the KBP annotator.
* @return A collection of {@link RelationTriple} objects representing the KBP triples in the sentence.
*/
public Collection<RelationTriple> kbpTriples(Properties props) {
document.runKBP(props);
synchronized (impl) {
List<CoreLabel> tokens = asCoreLabels();
Annotation doc = document.asAnnotation();
return impl.getKbpTripleList().stream().map(x -> ProtobufAnnotationSerializer.fromProto(x, doc, this.sentenceIndex())).collect(Collectors.toList());
}
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class RelationTripleSegmenterTest method mkExtraction.
/**
* Create a relation from a CoNLL format like:
* <pre>
* word_index word parent_index incoming_relation
* </pre>
*/
protected Optional<RelationTriple> mkExtraction(String conll, int listIndex, boolean allNominals) {
Pair<SemanticGraph, List<CoreLabel>> info = mkTree(conll);
SemanticGraph tree = info.first;
List<CoreLabel> sentence = info.second;
// Run extractor
Optional<RelationTriple> segmented = new RelationTripleSegmenter(allNominals).segment(tree, Optional.empty());
if (segmented.isPresent() && listIndex == 0) {
return segmented;
}
List<RelationTriple> extracted = new RelationTripleSegmenter(allNominals).extract(tree, sentence);
if (extracted.size() > listIndex) {
return Optional.of(extracted.get(listIndex - (segmented.isPresent() ? 1 : 0)));
}
return Optional.empty();
}
Aggregations