Search in sources :

Example 6 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class HybridCorefAnnotator method annotate.

@Override
public void annotate(Annotation annotation) {
    try {
        if (!annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
            log.error("this coreference resolution system requires SentencesAnnotation!");
            return;
        }
        if (hasSpeakerAnnotations(annotation)) {
            annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
        }
        Document corefDoc = corefSystem.docMaker.makeDocument(annotation);
        Map<Integer, CorefChain> result = corefSystem.coref(corefDoc);
        annotation.set(CorefCoreAnnotations.CorefChainAnnotation.class, result);
        // for backward compatibility
        if (OLD_FORMAT)
            annotateOldFormat(result, corefDoc);
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : CorefChain(edu.stanford.nlp.coref.data.CorefChain) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Document(edu.stanford.nlp.coref.data.Document) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) IOException(java.io.IOException)

Example 7 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class HybridCorefAnnotator method getLinks.

public static List<Pair<IntTuple, IntTuple>> getLinks(Map<Integer, CorefChain> result) {
    List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
    CorefChain.CorefMentionComparator comparator = new CorefChain.CorefMentionComparator();
    for (CorefChain c : result.values()) {
        List<CorefMention> s = c.getMentionsInTextualOrder();
        for (CorefMention m1 : s) {
            for (CorefMention m2 : s) {
                if (comparator.compare(m1, m2) == 1)
                    links.add(new Pair<>(m1.position, m2.position));
            }
        }
    }
    return links;
}
Also used : CorefMention(edu.stanford.nlp.coref.data.CorefChain.CorefMention) CorefChain(edu.stanford.nlp.coref.data.CorefChain)

Example 8 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class JSONOutputter method print.

/** {@inheritDoc} */
// It's lying; we need the "redundant" casts (as of 2014-09-08)
@SuppressWarnings("RedundantCast")
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
    PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
    JSONWriter l0 = new JSONWriter(writer, options);
    l0.object(l1 -> {
        l1.set("docId", doc.get(CoreAnnotations.DocIDAnnotation.class));
        l1.set("docDate", doc.get(CoreAnnotations.DocDateAnnotation.class));
        l1.set("docSourceType", doc.get(CoreAnnotations.DocSourceTypeAnnotation.class));
        l1.set("docType", doc.get(CoreAnnotations.DocTypeAnnotation.class));
        l1.set("author", doc.get(CoreAnnotations.AuthorAnnotation.class));
        l1.set("location", doc.get(CoreAnnotations.LocationAnnotation.class));
        if (options.includeText) {
            l1.set("text", doc.get(CoreAnnotations.TextAnnotation.class));
        }
        if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
            l1.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<Writer>) (Writer l2) -> {
                l2.set("id", sentence.get(CoreAnnotations.SentenceIDAnnotation.class));
                l2.set("index", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
                l2.set("line", sentence.get(CoreAnnotations.LineNumberAnnotation.class));
                StringWriter treeStrWriter = new StringWriter();
                TreePrint treePrinter = options.constituentTreePrinter;
                if (treePrinter == AnnotationOutputter.DEFAULT_CONSTITUENT_TREE_PRINTER) {
                    treePrinter = new TreePrint("oneline");
                }
                treePrinter.printTree(sentence.get(TreeCoreAnnotations.TreeAnnotation.class), new PrintWriter(treeStrWriter, true));
                String treeStr = treeStrWriter.toString().trim();
                if (!"SENTENCE_SKIPPED_OR_UNPARSABLE".equals(treeStr)) {
                    l2.set("parse", treeStr);
                }
                l2.set("basicDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)));
                l2.set("enhancedDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)));
                l2.set("enhancedPlusPlusDependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)));
                Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
                if (sentimentTree != null) {
                    int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
                    String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
                    l2.set("sentimentValue", Integer.toString(sentiment));
                    l2.set("sentiment", sentimentClass.replaceAll(" ", ""));
                }
                Collection<RelationTriple> openIETriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
                if (openIETriples != null) {
                    l2.set("openie", openIETriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
                        tripleWriter.set("subject", triple.subjectGloss());
                        tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
                        tripleWriter.set("relation", triple.relationGloss());
                        tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
                        tripleWriter.set("object", triple.objectGloss());
                        tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
                    }));
                }
                Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
                if (kbpTriples != null) {
                    l2.set("kbp", kbpTriples.stream().map(triple -> (Consumer<Writer>) (Writer tripleWriter) -> {
                        tripleWriter.set("subject", triple.subjectGloss());
                        tripleWriter.set("subjectSpan", Span.fromPair(triple.subjectTokenSpan()));
                        tripleWriter.set("relation", triple.relationGloss());
                        tripleWriter.set("relationSpan", Span.fromPair(triple.relationTokenSpan()));
                        tripleWriter.set("object", triple.objectGloss());
                        tripleWriter.set("objectSpan", Span.fromPair(triple.objectTokenSpan()));
                    }));
                }
                if (sentence.get(CoreAnnotations.MentionsAnnotation.class) != null) {
                    Integer sentTokenBegin = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
                    l2.set("entitymentions", sentence.get(CoreAnnotations.MentionsAnnotation.class).stream().map(m -> (Consumer<Writer>) (Writer l3) -> {
                        Integer tokenBegin = m.get(CoreAnnotations.TokenBeginAnnotation.class);
                        Integer tokenEnd = m.get(CoreAnnotations.TokenEndAnnotation.class);
                        l3.set("docTokenBegin", tokenBegin);
                        l3.set("docTokenEnd", tokenEnd);
                        if (tokenBegin != null && sentTokenBegin != null) {
                            l3.set("tokenBegin", tokenBegin - sentTokenBegin);
                        }
                        if (tokenEnd != null && sentTokenBegin != null) {
                            l3.set("tokenEnd", tokenEnd - sentTokenBegin);
                        }
                        l3.set("text", m.get(CoreAnnotations.TextAnnotation.class));
                        l3.set("characterOffsetBegin", m.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                        l3.set("characterOffsetEnd", m.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                        l3.set("ner", m.get(CoreAnnotations.NamedEntityTagAnnotation.class));
                        l3.set("normalizedNER", m.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
                        l3.set("entitylink", m.get(CoreAnnotations.WikipediaEntityAnnotation.class));
                        Timex time = m.get(TimeAnnotations.TimexAnnotation.class);
                        if (time != null) {
                            Timex.Range range = time.range();
                            l3.set("timex", (Consumer<Writer>) l4 -> {
                                l4.set("tid", time.tid());
                                l4.set("type", time.timexType());
                                l4.set("value", time.value());
                                l4.set("altValue", time.altVal());
                                l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
                                    l5.set("begin", range.begin);
                                    l5.set("end", range.end);
                                    l5.set("duration", range.duration);
                                } : null);
                            });
                        }
                    }));
                }
                if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
                    l2.set("tokens", sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(token -> (Consumer<Writer>) (Writer l3) -> {
                        l3.set("index", token.index());
                        l3.set("word", token.word());
                        l3.set("originalText", token.originalText());
                        l3.set("lemma", token.lemma());
                        l3.set("characterOffsetBegin", token.beginPosition());
                        l3.set("characterOffsetEnd", token.endPosition());
                        l3.set("pos", token.tag());
                        l3.set("ner", token.ner());
                        l3.set("normalizedNER", token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
                        l3.set("speaker", token.get(CoreAnnotations.SpeakerAnnotation.class));
                        l3.set("truecase", token.get(CoreAnnotations.TrueCaseAnnotation.class));
                        l3.set("truecaseText", token.get(CoreAnnotations.TrueCaseTextAnnotation.class));
                        l3.set("before", token.get(CoreAnnotations.BeforeAnnotation.class));
                        l3.set("after", token.get(CoreAnnotations.AfterAnnotation.class));
                        l3.set("entitylink", token.get(CoreAnnotations.WikipediaEntityAnnotation.class));
                        Timex time = token.get(TimeAnnotations.TimexAnnotation.class);
                        if (time != null) {
                            Timex.Range range = time.range();
                            l3.set("timex", (Consumer<Writer>) l4 -> {
                                l4.set("tid", time.tid());
                                l4.set("type", time.timexType());
                                l4.set("value", time.value());
                                l4.set("altValue", time.altVal());
                                l4.set("range", (range != null) ? (Consumer<Writer>) l5 -> {
                                    l5.set("begin", range.begin);
                                    l5.set("end", range.end);
                                    l5.set("duration", range.duration);
                                } : null);
                            });
                        }
                    }));
                }
            }));
        }
        if (doc.get(CorefCoreAnnotations.CorefChainAnnotation.class) != null) {
            Map<Integer, CorefChain> corefChains = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);
            if (corefChains != null) {
                l1.set("corefs", (Consumer<Writer>) chainWriter -> {
                    for (CorefChain chain : corefChains.values()) {
                        CorefChain.CorefMention representative = chain.getRepresentativeMention();
                        chainWriter.set(Integer.toString(chain.getChainID()), chain.getMentionsInTextualOrder().stream().map(mention -> (Consumer<Writer>) (Writer mentionWriter) -> {
                            mentionWriter.set("id", mention.mentionID);
                            mentionWriter.set("text", mention.mentionSpan);
                            mentionWriter.set("type", mention.mentionType);
                            mentionWriter.set("number", mention.number);
                            mentionWriter.set("gender", mention.gender);
                            mentionWriter.set("animacy", mention.animacy);
                            mentionWriter.set("startIndex", mention.startIndex);
                            mentionWriter.set("endIndex", mention.endIndex);
                            mentionWriter.set("headIndex", mention.headIndex);
                            mentionWriter.set("sentNum", mention.sentNum);
                            mentionWriter.set("position", Arrays.stream(mention.position.elems()).boxed().collect(Collectors.toList()));
                            mentionWriter.set("isRepresentativeMention", mention == representative);
                        }));
                    }
                });
            }
        }
        if (doc.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
            List<CoreMap> quotes = QuoteAnnotator.gatherQuotes(doc);
            l1.set("quotes", quotes.stream().map(quote -> (Consumer<Writer>) (Writer l2) -> {
                l2.set("id", quote.get(CoreAnnotations.QuotationIndexAnnotation.class));
                l2.set("text", quote.get(CoreAnnotations.TextAnnotation.class));
                l2.set("beginIndex", quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                l2.set("endIndex", quote.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                l2.set("beginToken", quote.get(CoreAnnotations.TokenBeginAnnotation.class));
                l2.set("endToken", quote.get(CoreAnnotations.TokenEndAnnotation.class));
                l2.set("beginSentence", quote.get(CoreAnnotations.SentenceBeginAnnotation.class));
                l2.set("endSentence", quote.get(CoreAnnotations.SentenceEndAnnotation.class));
            }));
        }
    });
    // flush
    l0.writer.flush();
}
Also used : java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) SentenceUtils(edu.stanford.nlp.ling.SentenceUtils) Tree(edu.stanford.nlp.trees.Tree) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Pair(edu.stanford.nlp.util.Pair) CoreMap(edu.stanford.nlp.util.CoreMap) Timex(edu.stanford.nlp.time.Timex) IndexedWord(edu.stanford.nlp.ling.IndexedWord) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TreePrint(edu.stanford.nlp.trees.TreePrint) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) IOUtils(edu.stanford.nlp.io.IOUtils) Pointer(edu.stanford.nlp.util.Pointer) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) DecimalFormat(java.text.DecimalFormat) StringOutputStream(edu.stanford.nlp.io.StringOutputStream) Collectors(java.util.stream.Collectors) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Consumer(java.util.function.Consumer) Stream(java.util.stream.Stream) java.io(java.io) Generics(edu.stanford.nlp.util.Generics) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) TreePrint(edu.stanford.nlp.trees.TreePrint) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Consumer(java.util.function.Consumer) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) Tree(edu.stanford.nlp.trees.Tree) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TreePrint(edu.stanford.nlp.trees.TreePrint) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Timex(edu.stanford.nlp.time.Timex)

Example 9 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class ProtobufAnnotationSerializer method fromProto.

/**
   * Returns a complete document, intended to mimic a document passes as input to
   * {@link ProtobufAnnotationSerializer#toProto(Annotation)} as closely as possible.
   * That is, most common fields are serialized, but there is not guarantee that custom additions
   * will be saved and retrieved.
   *
   * @param proto The protocol buffer to read the document from.
   * @return An Annotation corresponding to the read protobuf.
   */
@SuppressWarnings("deprecation")
public Annotation fromProto(CoreNLPProtos.Document proto) {
    if (Thread.interrupted()) {
        throw new RuntimeInterruptedException();
    }
    // Set text
    Annotation ann = new Annotation(proto.getText());
    // if there are characters, add characters
    if (proto.getCharacterCount() > 0) {
        List<CoreLabel> docChars = new ArrayList<CoreLabel>();
        for (CoreNLPProtos.Token c : proto.getCharacterList()) {
            docChars.add(fromProto(c));
        }
        ann.set(SegmenterCoreAnnotations.CharactersAnnotation.class, docChars);
    }
    // Add tokens
    List<CoreLabel> tokens = new ArrayList<>();
    if (proto.getSentenceCount() > 0) {
        // Populate the tokens from the sentence
        for (CoreNLPProtos.Sentence sentence : proto.getSentenceList()) {
            // It's conceivable that the sentences are not contiguous -- pad this with nulls
            while (sentence.hasTokenOffsetBegin() && tokens.size() < sentence.getTokenOffsetBegin()) {
                tokens.add(null);
            }
            // Read the sentence
            for (CoreNLPProtos.Token token : sentence.getTokenList()) {
                CoreLabel coreLabel = fromProto(token);
                // Set docid
                if (proto.hasDocID()) {
                    coreLabel.setDocID(proto.getDocID());
                }
                if (token.hasTokenBeginIndex() && token.hasTokenEndIndex()) {
                    // This is usually true, if enough annotators are defined
                    while (tokens.size() < sentence.getTokenOffsetEnd()) {
                        tokens.add(null);
                    }
                    for (int i = token.getTokenBeginIndex(); i < token.getTokenEndIndex(); ++i) {
                        tokens.set(token.getTokenBeginIndex(), coreLabel);
                    }
                } else {
                    // Assume this token spans a single token, and just add it to the tokens list
                    tokens.add(coreLabel);
                }
            }
        }
    } else if (proto.getSentencelessTokenCount() > 0) {
        // Eek -- no sentences. Try to recover tokens directly
        if (proto.getSentencelessTokenCount() > 0) {
            for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
                CoreLabel coreLabel = fromProto(token);
                // Set docid
                if (proto.hasDocID()) {
                    coreLabel.setDocID(proto.getDocID());
                }
                tokens.add(coreLabel);
            }
        }
    }
    if (!tokens.isEmpty()) {
        ann.set(TokensAnnotation.class, tokens);
    }
    // Add sentences
    List<CoreMap> sentences = new ArrayList<>(proto.getSentenceCount());
    for (int sentIndex = 0; sentIndex < proto.getSentenceCount(); ++sentIndex) {
        CoreNLPProtos.Sentence sentence = proto.getSentence(sentIndex);
        CoreMap map = fromProtoNoTokens(sentence);
        if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd() && map.get(TokensAnnotation.class) == null) {
            // Set tokens for sentence
            int tokenBegin = sentence.getTokenOffsetBegin();
            int tokenEnd = sentence.getTokenOffsetEnd();
            assert tokenBegin <= tokens.size() && tokenBegin <= tokenEnd;
            assert tokenEnd <= tokens.size();
            map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
            // Set sentence index + token index + paragraph index
            for (int i = tokenBegin; i < tokenEnd; ++i) {
                tokens.get(i).setSentIndex(sentIndex);
                tokens.get(i).setIndex(i - sentence.getTokenOffsetBegin() + 1);
                if (sentence.hasParagraph()) {
                    tokens.get(i).set(ParagraphAnnotation.class, sentence.getParagraph());
                }
            }
            // Set text
            int characterBegin = sentence.getCharacterOffsetBegin();
            int characterEnd = sentence.getCharacterOffsetEnd();
            if (characterEnd <= proto.getText().length()) {
                // The usual case -- get the text from the document text
                map.set(TextAnnotation.class, proto.getText().substring(characterBegin, characterEnd));
            } else {
                // The document text is wrong -- guess the text from the tokens
                map.set(TextAnnotation.class, recoverOriginalText(tokens.subList(tokenBegin, tokenEnd), sentence));
            }
        }
        // End iteration
        sentences.add(map);
    }
    if (!sentences.isEmpty()) {
        ann.set(SentencesAnnotation.class, sentences);
    }
    // Set DocID
    String docid = null;
    if (proto.hasDocID()) {
        docid = proto.getDocID();
        ann.set(DocIDAnnotation.class, docid);
    }
    // Set reference time
    if (proto.hasDocDate()) {
        ann.set(DocDateAnnotation.class, proto.getDocDate());
    }
    if (proto.hasCalendar()) {
        GregorianCalendar calendar = new GregorianCalendar();
        calendar.setTimeInMillis(proto.getCalendar());
        ann.set(CalendarAnnotation.class, calendar);
    }
    // Set coref chain
    Map<Integer, CorefChain> corefChains = new HashMap<>();
    for (CoreNLPProtos.CorefChain chainProto : proto.getCorefChainList()) {
        CorefChain chain = fromProto(chainProto, ann);
        corefChains.put(chain.getChainID(), chain);
    }
    if (!corefChains.isEmpty()) {
        ann.set(CorefChainAnnotation.class, corefChains);
    }
    // hashes to access Mentions , later in this method need to add speakerInfo to Mention
    // so we need to create id -> Mention, CoreNLPProtos.Mention maps to do this, since SpeakerInfo could reference
    // any Mention in doc
    HashMap<Integer, Mention> idToMention = new HashMap<>();
    HashMap<Integer, CoreNLPProtos.Mention> idToProtoMention = new HashMap<>();
    // Set things in the sentence that need a document context.
    for (int sentenceIndex = 0; sentenceIndex < proto.getSentenceCount(); ++sentenceIndex) {
        CoreNLPProtos.Sentence sentence = proto.getSentenceList().get(sentenceIndex);
        CoreMap map = sentences.get(sentenceIndex);
        List<CoreLabel> sentenceTokens = map.get(TokensAnnotation.class);
        // Set dependency graphs
        if (sentence.hasBasicDependencies()) {
            map.set(BasicDependenciesAnnotation.class, fromProto(sentence.getBasicDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasCollapsedDependencies()) {
            map.set(CollapsedDependenciesAnnotation.class, fromProto(sentence.getCollapsedDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasCollapsedCCProcessedDependencies()) {
            map.set(CollapsedCCProcessedDependenciesAnnotation.class, fromProto(sentence.getCollapsedCCProcessedDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasAlternativeDependencies()) {
            map.set(AlternativeDependenciesAnnotation.class, fromProto(sentence.getAlternativeDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasEnhancedDependencies()) {
            map.set(EnhancedDependenciesAnnotation.class, fromProto(sentence.getEnhancedDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasEnhancedPlusPlusDependencies()) {
            map.set(EnhancedPlusPlusDependenciesAnnotation.class, fromProto(sentence.getEnhancedPlusPlusDependencies(), sentenceTokens, docid));
        }
        // Set entailed sentences
        if (sentence.getEntailedSentenceCount() > 0) {
            Set<SentenceFragment> entailedSentences = sentence.getEntailedSentenceList().stream().map(frag -> fromProto(frag, map.get(EnhancedPlusPlusDependenciesAnnotation.class))).collect(Collectors.toSet());
            map.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, entailedSentences);
        }
        if (sentence.getEntailedClauseCount() > 0) {
            Set<SentenceFragment> entailedClauses = sentence.getEntailedClauseList().stream().map(frag -> fromProto(frag, map.get(CollapsedDependenciesAnnotation.class))).collect(Collectors.toSet());
            map.set(NaturalLogicAnnotations.EntailedClausesAnnotation.class, entailedClauses);
        }
        // Set relation triples
        if (sentence.getOpenieTripleCount() > 0) {
            List<RelationTriple> triples = new ArrayList<>();
            for (CoreNLPProtos.RelationTriple triple : sentence.getOpenieTripleList()) {
                triples.add(fromProto(triple, ann, sentenceIndex));
            }
            map.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, triples);
        }
        // Redo some light annotation
        if (map.containsKey(TokensAnnotation.class) && (!sentence.hasHasNumerizedTokensAnnotation() || sentence.getHasNumerizedTokensAnnotation())) {
            map.set(NumerizedTokensAnnotation.class, NumberNormalizer.findAndMergeNumbers(map));
        }
        // add the CoreLabel and IndexedWord info to each mention
        // when Mentions are serialized, just storing the index in the sentence for CoreLabels and IndexedWords
        // this is the point where the de-serialized sentence has tokens
        int mentionInt = 0;
        for (CoreNLPProtos.Mention protoMention : sentence.getMentionsForCorefList()) {
            // get the mention
            Mention mentionToUpdate = map.get(CorefMentionsAnnotation.class).get(mentionInt);
            // store these in hash for more processing later in this method
            idToMention.put(mentionToUpdate.mentionID, mentionToUpdate);
            idToProtoMention.put(mentionToUpdate.mentionID, protoMention);
            // update the values
            int headIndexedWordIndex = protoMention.getHeadIndexedWord().getTokenIndex();
            if (headIndexedWordIndex >= 0) {
                mentionToUpdate.headIndexedWord = new IndexedWord(sentenceTokens.get(protoMention.getHeadIndexedWord().getTokenIndex()));
                mentionToUpdate.headIndexedWord.setCopyCount(protoMention.getHeadIndexedWord().getCopyCount());
            }
            int dependingVerbIndex = protoMention.getDependingVerb().getTokenIndex();
            if (dependingVerbIndex >= 0) {
                mentionToUpdate.dependingVerb = new IndexedWord(sentenceTokens.get(protoMention.getDependingVerb().getTokenIndex()));
                mentionToUpdate.dependingVerb.setCopyCount(protoMention.getDependingVerb().getCopyCount());
            }
            int headWordIndex = protoMention.getHeadWord().getTokenIndex();
            if (headWordIndex >= 0) {
                mentionToUpdate.headWord = sentenceTokens.get(protoMention.getHeadWord().getTokenIndex());
            }
            mentionToUpdate.sentenceWords = new ArrayList<>();
            for (CoreNLPProtos.IndexedWord clp : protoMention.getSentenceWordsList()) {
                int ti = clp.getTokenIndex();
                mentionToUpdate.sentenceWords.add(sentenceTokens.get(ti));
            }
            mentionToUpdate.originalSpan = new ArrayList<>();
            for (CoreNLPProtos.IndexedWord clp : protoMention.getOriginalSpanList()) {
                int ti = clp.getTokenIndex();
                mentionToUpdate.originalSpan.add(sentenceTokens.get(ti));
            }
            if (protoMention.getHasBasicDependency()) {
                mentionToUpdate.basicDependency = map.get(BasicDependenciesAnnotation.class);
            }
            if (protoMention.getHasEnhancedDepenedncy()) {
                mentionToUpdate.enhancedDependency = map.get(EnhancedDependenciesAnnotation.class);
            }
            if (protoMention.getHasContextParseTree()) {
                mentionToUpdate.contextParseTree = map.get(TreeAnnotation.class);
            }
            // move on to next mention
            mentionInt++;
        }
    }
    // Set quotes
    List<CoreMap> quotes = proto.getQuoteList().stream().map(quote -> fromProto(quote, tokens)).collect(Collectors.toList());
    if (!quotes.isEmpty()) {
        ann.set(QuotationsAnnotation.class, quotes);
    }
    // Set NERmention
    List<CoreMap> mentions = proto.getMentionsList().stream().map(this::fromProto).collect(Collectors.toList());
    if (!mentions.isEmpty()) {
        ann.set(MentionsAnnotation.class, mentions);
    }
    // also add all the Set<Mention>
    for (int mentionID : idToMention.keySet()) {
        // this is the Mention message corresponding to this Mention
        Mention mentionToUpdate = idToMention.get(mentionID);
        CoreNLPProtos.Mention correspondingProtoMention = idToProtoMention.get(mentionID);
        if (!correspondingProtoMention.hasSpeakerInfo()) {
            // so just continue to next Mention
            continue;
        }
        // if we're here we know a speakerInfo was stored
        SpeakerInfo speakerInfo = fromProto(correspondingProtoMention.getSpeakerInfo());
        // MentionID is ID in document, 0, 1, 2, etc...
        for (int speakerInfoMentionID : correspondingProtoMention.getSpeakerInfo().getMentionsList()) {
            speakerInfo.addMention(idToMention.get(speakerInfoMentionID));
        }
        // now the SpeakerInfo for this Mention should be fully restored
        mentionToUpdate.speakerInfo = speakerInfo;
    }
    // Return
    return ann;
}
Also used : ExtractionObject(edu.stanford.nlp.ie.machinereading.structure.ExtractionObject) java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) edu.stanford.nlp.util(edu.stanford.nlp.util) Tree(edu.stanford.nlp.trees.Tree) Dictionaries(edu.stanford.nlp.coref.data.Dictionaries) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Language(edu.stanford.nlp.international.Language) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode) Timex(edu.stanford.nlp.time.Timex) IndexedWord(edu.stanford.nlp.ling.IndexedWord) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) edu.stanford.nlp.naturalli(edu.stanford.nlp.naturalli) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) NumberNormalizer(edu.stanford.nlp.ie.NumberNormalizer) Collectors(java.util.stream.Collectors) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) SegmenterCoreAnnotations(edu.stanford.nlp.ling.SegmenterCoreAnnotations) SpeakerInfo(edu.stanford.nlp.coref.data.SpeakerInfo) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Word(edu.stanford.nlp.ling.Word) java.io(java.io) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) Mention(edu.stanford.nlp.coref.data.Mention) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SegmenterCoreAnnotations(edu.stanford.nlp.ling.SegmenterCoreAnnotations) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SpeakerInfo(edu.stanford.nlp.coref.data.SpeakerInfo) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 10 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class CorefAnnotator method getLinks.

public static List<Pair<IntTuple, IntTuple>> getLinks(Map<Integer, CorefChain> result) {
    List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
    CorefChain.CorefMentionComparator comparator = new CorefChain.CorefMentionComparator();
    for (CorefChain c : result.values()) {
        List<CorefMention> s = c.getMentionsInTextualOrder();
        for (CorefMention m1 : s) {
            for (CorefMention m2 : s) {
                if (comparator.compare(m1, m2) == 1) {
                    links.add(new Pair<>(m1.position, m2.position));
                }
            }
        }
    }
    return links;
}
Also used : CorefMention(edu.stanford.nlp.coref.data.CorefChain.CorefMention) CorefChain(edu.stanford.nlp.coref.data.CorefChain) ArrayList(java.util.ArrayList) Pair(edu.stanford.nlp.util.Pair)

Aggregations

CorefChain (edu.stanford.nlp.coref.data.CorefChain)27 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)17 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)12 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)10 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)7 Tree (edu.stanford.nlp.trees.Tree)7 CoreMap (edu.stanford.nlp.util.CoreMap)7 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)6 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)6 CorefMention (edu.stanford.nlp.coref.data.CorefChain.CorefMention)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 SentimentCoreAnnotations (edu.stanford.nlp.sentiment.SentimentCoreAnnotations)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)3 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)3 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)3 Span (edu.stanford.nlp.ie.machinereading.structure.Span)3 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)3