Search in sources :

Example 21 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class CustomAnnotationSerializer method loadCorefChains.

/**
   * Loads the CorefChain objects from the serialized buffer
   * @param reader the buffer
   * @return A map from cluster id to clusters
   * @throws IOException
   */
private static Map<Integer, CorefChain> loadCorefChains(BufferedReader reader) throws IOException {
    String line = reader.readLine().trim();
    if (line.isEmpty())
        return null;
    int clusterCount = Integer.valueOf(line);
    Map<Integer, CorefChain> chains = Generics.newHashMap();
    // read each cluster
    for (int c = 0; c < clusterCount; c++) {
        line = reader.readLine().trim();
        String[] bits = line.split("\\s");
        int cid = Integer.valueOf(bits[0]);
        int mentionCount = Integer.valueOf(bits[1]);
        Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = Generics.newHashMap();
        CorefChain.CorefMention representative = null;
        // read each mention in this cluster
        for (int m = 0; m < mentionCount; m++) {
            line = reader.readLine();
            bits = line.split("\\s");
            IntPair key = new IntPair(Integer.valueOf(bits[0]), Integer.valueOf(bits[1]));
            boolean rep = bits[2].equals("1");
            Dictionaries.MentionType mentionType = parseMentionType(bits[3]);
            Dictionaries.Number number = parseNumber(bits[4]);
            Dictionaries.Gender gender = parseGender(bits[5]);
            Dictionaries.Animacy animacy = parseAnimacy(bits[6]);
            int startIndex = Integer.valueOf(bits[7]);
            int endIndex = Integer.valueOf(bits[8]);
            int headIndex = Integer.valueOf(bits[9]);
            int clusterID = Integer.valueOf(bits[10]);
            int mentionID = Integer.valueOf(bits[11]);
            int sentNum = Integer.valueOf(bits[12]);
            int posLen = Integer.valueOf(bits[13]);
            int[] posElems = new int[posLen];
            for (int i = 0; i < posLen; i++) {
                posElems[i] = Integer.valueOf(bits[14 + i]);
            }
            IntTuple position = new IntTuple(posElems);
            String span = unescapeSpace(bits[14 + posLen]);
            CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span);
            Set<CorefChain.CorefMention> mentionsWithThisHead = mentionMap.get(key);
            if (mentionsWithThisHead == null) {
                mentionsWithThisHead = Generics.newHashSet();
                mentionMap.put(key, mentionsWithThisHead);
            }
            mentionsWithThisHead.add(mention);
            if (rep)
                representative = mention;
        }
        // construct the cluster
        CorefChain chain = new CorefChain(cid, mentionMap, representative);
        chains.put(cid, chain);
    }
    reader.readLine();
    return chains;
}
Also used : Dictionaries(edu.stanford.nlp.coref.data.Dictionaries) CorefChain(edu.stanford.nlp.coref.data.CorefChain)

Example 22 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class CustomAnnotationSerializer method read.

@Override
public Pair<Annotation, InputStream> read(InputStream is) throws IOException {
    if (compress && !(is instanceof GZIPInputStream))
        is = new GZIPInputStream(is);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    Annotation doc = new Annotation("");
    String line;
    // read the coref graph (new format)
    Map<Integer, CorefChain> chains = loadCorefChains(reader);
    if (chains != null)
        doc.set(CorefCoreAnnotations.CorefChainAnnotation.class, chains);
    // read the coref graph (old format)
    line = reader.readLine().trim();
    if (line.length() > 0) {
        String[] bits = line.split(" ");
        if (bits.length % 4 != 0) {
            throw new RuntimeIOException("ERROR: Incorrect format for the serialized coref graph: " + line);
        }
        List<Pair<IntTuple, IntTuple>> corefGraph = new ArrayList<>();
        for (int i = 0; i < bits.length; i += 4) {
            IntTuple src = new IntTuple(2);
            IntTuple dst = new IntTuple(2);
            src.set(0, Integer.parseInt(bits[i]));
            src.set(1, Integer.parseInt(bits[i + 1]));
            dst.set(0, Integer.parseInt(bits[i + 2]));
            dst.set(1, Integer.parseInt(bits[i + 3]));
            corefGraph.add(new Pair<>(src, dst));
        }
        doc.set(CorefCoreAnnotations.CorefGraphAnnotation.class, corefGraph);
    }
    // read individual sentences
    List<CoreMap> sentences = new ArrayList<>();
    while ((line = reader.readLine()) != null) {
        CoreMap sentence = new Annotation("");
        // first line is the parse tree. construct it with CoreLabels in Tree nodes
        Tree tree = new PennTreeReader(new StringReader(line), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
        sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
        // read the dependency graphs
        IntermediateSemanticGraph intermCollapsedDeps = loadDependencyGraph(reader);
        IntermediateSemanticGraph intermUncollapsedDeps = loadDependencyGraph(reader);
        IntermediateSemanticGraph intermCcDeps = loadDependencyGraph(reader);
        // the remaining lines until empty line are tokens
        List<CoreLabel> tokens = new ArrayList<>();
        while ((line = reader.readLine()) != null) {
            if (line.length() == 0)
                break;
            CoreLabel token = loadToken(line, haveExplicitAntecedent);
            tokens.add(token);
        }
        sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
        // convert the intermediate graph to an actual SemanticGraph
        SemanticGraph collapsedDeps = intermCollapsedDeps.convertIntermediateGraph(tokens);
        sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, collapsedDeps);
        SemanticGraph uncollapsedDeps = intermUncollapsedDeps.convertIntermediateGraph(tokens);
        sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
        SemanticGraph ccDeps = intermCcDeps.convertIntermediateGraph(tokens);
        sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
        sentences.add(sentence);
    }
    doc.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return Pair.makePair(doc, is);
}
Also used : CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) GZIPInputStream(java.util.zip.GZIPInputStream) CorefChain(edu.stanford.nlp.coref.data.CorefChain) Tree(edu.stanford.nlp.trees.Tree) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 23 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class KBPAnnotator method annotate.

/**
   * Annotate this document for KBP relations.
   * @param annotation The document to annotate.
   */
@Override
public void annotate(Annotation annotation) {
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    // Annotate with NER
    //casedNER.annotate(annotation);
    //caselessNER.annotate(annotation);
    // Annotate with Mentions
    entityMentionAnnotator.annotate(annotation);
    // Create simple document
    Document doc = new Document(kbpProperties, serializer.toProto(annotation));
    // Get the mentions in the document
    List<CoreMap> mentions = new ArrayList<>();
    for (CoreMap sentence : sentences) {
        mentions.addAll(sentence.get(CoreAnnotations.MentionsAnnotation.class));
    }
    List<CoreMap> pronounMentions = annotatePronominalMentions(annotation);
    mentions.addAll(pronounMentions);
    // Compute coreferent clusters
    // (map an index to a KBP mention)
    Map<Pair<Integer, Integer>, CoreMap> mentionByStartIndex = new HashMap<>();
    for (CoreMap mention : mentions) {
        for (CoreLabel token : mention.get(CoreAnnotations.TokensAnnotation.class)) {
            mentionByStartIndex.put(Pair.makePair(token.sentIndex(), token.index()), mention);
        }
    }
    // (collect coreferent KBP mentions)
    // map from canonical mention -> other mentions
    Map<CoreMap, Set<CoreMap>> mentionsMap = new HashMap<>();
    if (annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class) != null) {
        for (Map.Entry<Integer, CorefChain> chain : annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class).entrySet()) {
            CoreMap firstMention = null;
            for (CorefChain.CorefMention mention : chain.getValue().getMentionsInTextualOrder()) {
                CoreMap kbpMention = null;
                for (int i = mention.startIndex; i < mention.endIndex; ++i) {
                    if (mentionByStartIndex.containsKey(Pair.makePair(mention.sentNum - 1, i))) {
                        kbpMention = mentionByStartIndex.get(Pair.makePair(mention.sentNum - 1, i));
                        break;
                    }
                }
                if (firstMention == null) {
                    firstMention = kbpMention;
                }
                if (kbpMention != null) {
                    if (!mentionsMap.containsKey(firstMention)) {
                        mentionsMap.put(firstMention, new LinkedHashSet<>());
                    }
                    mentionsMap.get(firstMention).add(kbpMention);
                }
            }
        }
    }
    // (coreference acronyms)
    acronymMatch(mentions, mentionsMap);
    // (ensure valid NER tag for canonical mention)
    for (CoreMap key : new HashSet<>(mentionsMap.keySet())) {
        if (key.get(CoreAnnotations.NamedEntityTagAnnotation.class) == null) {
            CoreMap newKey = null;
            for (CoreMap candidate : mentionsMap.get(key)) {
                if (candidate.get(CoreAnnotations.NamedEntityTagAnnotation.class) != null) {
                    newKey = candidate;
                    break;
                }
            }
            if (newKey != null) {
                mentionsMap.put(newKey, mentionsMap.remove(key));
            } else {
                // case: no mention in this chain has an NER tag.
                mentionsMap.remove(key);
            }
        }
    }
    // Propagate Entity Link
    for (Map.Entry<CoreMap, Set<CoreMap>> entry : mentionsMap.entrySet()) {
        String entityLink = entry.getKey().get(CoreAnnotations.WikipediaEntityAnnotation.class);
        for (CoreMap mention : entry.getValue()) {
            for (CoreLabel token : mention.get(CoreAnnotations.TokensAnnotation.class)) {
                token.set(CoreAnnotations.WikipediaEntityAnnotation.class, entityLink);
            }
        }
    }
    // Create a canonical mention map
    Map<CoreMap, CoreMap> mentionToCanonicalMention = new HashMap<>();
    for (Map.Entry<CoreMap, Set<CoreMap>> entry : mentionsMap.entrySet()) {
        for (CoreMap mention : entry.getValue()) {
            // (set the NER tag + link to be axiomatically that of the canonical mention)
            mention.set(CoreAnnotations.NamedEntityTagAnnotation.class, entry.getKey().get(CoreAnnotations.NamedEntityTagAnnotation.class));
            mention.set(CoreAnnotations.WikipediaEntityAnnotation.class, entry.getKey().get(CoreAnnotations.WikipediaEntityAnnotation.class));
            // (add the mention (note: this must come after we set the NER!)
            mentionToCanonicalMention.put(mention, entry.getKey());
        }
    }
    // (add missing mentions)
    mentions.stream().filter(mention -> mentionToCanonicalMention.get(mention) == null).forEach(mention -> mentionToCanonicalMention.put(mention, mention));
    // Cluster mentions by sentence
    @SuppressWarnings("unchecked") List<CoreMap>[] mentionsBySentence = new List[annotation.get(CoreAnnotations.SentencesAnnotation.class).size()];
    for (int i = 0; i < mentionsBySentence.length; ++i) {
        mentionsBySentence[i] = new ArrayList<>();
    }
    for (CoreMap mention : mentionToCanonicalMention.keySet()) {
        mentionsBySentence[mention.get(CoreAnnotations.SentenceIndexAnnotation.class)].add(mention);
    }
    // Classify
    for (int sentenceI = 0; sentenceI < mentionsBySentence.length; ++sentenceI) {
        // the annotations
        List<RelationTriple> triples = new ArrayList<>();
        List<CoreMap> candidates = mentionsBySentence[sentenceI];
        // determine sentence length
        int sentenceLength = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(sentenceI).get(CoreAnnotations.TokensAnnotation.class).size();
        // check if sentence is too long, if it's too long don't run kbp
        if (maxLength != -1 && sentenceLength > maxLength) {
            // set the triples annotation to an empty list of RelationTriples
            annotation.get(CoreAnnotations.SentencesAnnotation.class).get(sentenceI).set(CoreAnnotations.KBPTriplesAnnotation.class, triples);
            // continue to next sentence
            continue;
        }
        // sentence isn't too long, so continue processing this sentence
        for (int subjI = 0; subjI < candidates.size(); ++subjI) {
            CoreMap subj = candidates.get(subjI);
            int subjBegin = subj.get(CoreAnnotations.TokensAnnotation.class).get(0).index() - 1;
            int subjEnd = subj.get(CoreAnnotations.TokensAnnotation.class).get(subj.get(CoreAnnotations.TokensAnnotation.class).size() - 1).index();
            Optional<KBPRelationExtractor.NERTag> subjNER = KBPRelationExtractor.NERTag.fromString(subj.get(CoreAnnotations.NamedEntityTagAnnotation.class));
            if (subjNER.isPresent()) {
                for (int objI = 0; objI < candidates.size(); ++objI) {
                    if (subjI == objI) {
                        continue;
                    }
                    if (Thread.interrupted()) {
                        throw new RuntimeInterruptedException();
                    }
                    CoreMap obj = candidates.get(objI);
                    int objBegin = obj.get(CoreAnnotations.TokensAnnotation.class).get(0).index() - 1;
                    int objEnd = obj.get(CoreAnnotations.TokensAnnotation.class).get(obj.get(CoreAnnotations.TokensAnnotation.class).size() - 1).index();
                    Optional<KBPRelationExtractor.NERTag> objNER = KBPRelationExtractor.NERTag.fromString(obj.get(CoreAnnotations.NamedEntityTagAnnotation.class));
                    if (objNER.isPresent() && KBPRelationExtractor.RelationType.plausiblyHasRelation(subjNER.get(), objNER.get())) {
                        // type check
                        KBPRelationExtractor.KBPInput input = new KBPRelationExtractor.KBPInput(new Span(subjBegin, subjEnd), new Span(objBegin, objEnd), subjNER.get(), objNER.get(), doc.sentence(sentenceI));
                        //  -- BEGIN Classify
                        Pair<String, Double> prediction = extractor.classify(input);
                        // Handle the classifier output
                        if (!KBPStatisticalExtractor.NO_RELATION.equals(prediction.first)) {
                            RelationTriple triple = new RelationTriple.WithLink(subj.get(CoreAnnotations.TokensAnnotation.class), mentionToCanonicalMention.get(subj).get(CoreAnnotations.TokensAnnotation.class), Collections.singletonList(new CoreLabel(new Word(prediction.first))), obj.get(CoreAnnotations.TokensAnnotation.class), mentionToCanonicalMention.get(obj).get(CoreAnnotations.TokensAnnotation.class), prediction.second, sentences.get(sentenceI).get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class), subj.get(CoreAnnotations.WikipediaEntityAnnotation.class), obj.get(CoreAnnotations.WikipediaEntityAnnotation.class));
                            triples.add(triple);
                        }
                    }
                }
            }
        }
        // Set triples
        annotation.get(CoreAnnotations.SentencesAnnotation.class).get(sentenceI).set(CoreAnnotations.KBPTriplesAnnotation.class, triples);
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) java.util(java.util) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefChain(edu.stanford.nlp.coref.data.CorefChain) IOUtils(edu.stanford.nlp.io.IOUtils) edu.stanford.nlp.util(edu.stanford.nlp.util) Redwood(edu.stanford.nlp.util.logging.Redwood) IOException(java.io.IOException) Document(edu.stanford.nlp.simple.Document) Collectors(java.util.stream.Collectors) LinearClassifier(edu.stanford.nlp.classify.LinearClassifier) Classifier(edu.stanford.nlp.classify.Classifier) Span(edu.stanford.nlp.ie.machinereading.structure.Span) edu.stanford.nlp.ie(edu.stanford.nlp.ie) Word(edu.stanford.nlp.ling.Word) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) WordLists(edu.stanford.nlp.coref.data.WordLists) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Word(edu.stanford.nlp.ling.Word) Document(edu.stanford.nlp.simple.Document) Span(edu.stanford.nlp.ie.machinereading.structure.Span) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations)

Example 24 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class ProtobufAnnotationSerializer method toProtoBuilder.

/**
   * <p>
   *   The method to extend by subclasses of the Protobuf Annotator if custom additions are added to Tokens.
   *   In contrast to {@link ProtobufAnnotationSerializer#toProto(edu.stanford.nlp.ling.CoreLabel)}, this function
   *   returns a builder that can be extended.
   * </p>
   *
   * @param doc The sentence to save to a protocol buffer
   * @param keysToSerialize A set tracking which keys have been saved. It's important to remove any keys added to the proto
   *                        from this set, as the code tracks annotations to ensure lossless serializationA set tracking which keys have been saved. It's important to remove any keys added to the proto*
   *                        from this set, as the code tracks annotations to ensure lossless serialization.
   */
protected CoreNLPProtos.Document.Builder toProtoBuilder(Annotation doc, Set<Class<?>> keysToSerialize) {
    CoreNLPProtos.Document.Builder builder = CoreNLPProtos.Document.newBuilder();
    // Required fields
    builder.setText(doc.get(TextAnnotation.class));
    keysToSerialize.remove(TextAnnotation.class);
    // Optional fields
    if (doc.containsKey(SentencesAnnotation.class)) {
        for (CoreMap sentence : doc.get(SentencesAnnotation.class)) {
            builder.addSentence(toProto(sentence));
        }
        keysToSerialize.remove(SentencesAnnotation.class);
    } else if (doc.containsKey(TokensAnnotation.class)) {
        for (CoreLabel token : doc.get(TokensAnnotation.class)) {
            builder.addSentencelessToken(toProto(token));
        }
    }
    if (doc.containsKey(DocIDAnnotation.class)) {
        builder.setDocID(doc.get(DocIDAnnotation.class));
        keysToSerialize.remove(DocIDAnnotation.class);
    }
    if (doc.containsKey(DocDateAnnotation.class)) {
        builder.setDocDate(doc.get(DocDateAnnotation.class));
        keysToSerialize.remove(DocDateAnnotation.class);
    }
    if (doc.containsKey(CalendarAnnotation.class)) {
        builder.setCalendar(doc.get(CalendarAnnotation.class).toInstant().toEpochMilli());
        keysToSerialize.remove(CalendarAnnotation.class);
    }
    if (doc.containsKey(CorefChainAnnotation.class)) {
        for (Map.Entry<Integer, CorefChain> chain : doc.get(CorefChainAnnotation.class).entrySet()) {
            builder.addCorefChain(toProto(chain.getValue()));
        }
        keysToSerialize.remove(CorefChainAnnotation.class);
    }
    if (doc.containsKey(QuotationsAnnotation.class)) {
        for (CoreMap quote : doc.get(QuotationsAnnotation.class)) {
            builder.addQuote(toProtoQuote(quote));
        }
        keysToSerialize.remove(QuotationsAnnotation.class);
    }
    if (doc.containsKey(MentionsAnnotation.class)) {
        for (CoreMap mention : doc.get(MentionsAnnotation.class)) {
            builder.addMentions(toProtoMention(mention));
        }
        keysToSerialize.remove(MentionsAnnotation.class);
    }
    // add character info from segmenter
    if (doc.containsKey(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
        for (CoreLabel c : doc.get(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
            builder.addCharacter(toProto(c));
        }
        keysToSerialize.remove(SegmenterCoreAnnotations.CharactersAnnotation.class);
    }
    // Return
    return builder;
}
Also used : SegmenterCoreAnnotations(edu.stanford.nlp.ling.SegmenterCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefChain(edu.stanford.nlp.coref.data.CorefChain)

Example 25 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class TextOutputter method print.

/**
   * The meat of the outputter
   */
private static void print(Annotation annotation, PrintWriter pw, Options options) throws IOException {
    double beam = options.beamPrintingOption;
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    // Display docid if available
    String docId = annotation.get(CoreAnnotations.DocIDAnnotation.class);
    if (docId != null) {
        List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
        int nSentences = (sentences != null) ? sentences.size() : 0;
        int nTokens = (tokens != null) ? tokens.size() : 0;
        pw.printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens);
    }
    // Display doctitle if available
    String docTitle = annotation.get(CoreAnnotations.DocTitleAnnotation.class);
    if (docTitle != null) {
        pw.printf("Document Title: %s%n", docTitle);
    }
    // Display docdate if available
    String docDate = annotation.get(CoreAnnotations.DocDateAnnotation.class);
    if (docDate != null) {
        pw.printf("Document Date: %s%n", docDate);
    }
    // Display doctype if available
    String docType = annotation.get(CoreAnnotations.DocTypeAnnotation.class);
    if (docType != null) {
        pw.printf("Document Type: %s%n", docType);
    }
    // Display docsourcetype if available
    String docSourceType = annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class);
    if (docSourceType != null) {
        pw.printf("Document Source Type: %s%n", docSourceType);
    }
    // display each sentence in this annotation
    if (sentences != null) {
        for (int i = 0, sz = sentences.size(); i < sz; i++) {
            CoreMap sentence = sentences.get(i);
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
            if (sentiment == null) {
                sentiment = "";
            } else {
                sentiment = ", sentiment: " + sentiment;
            }
            pw.printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.size(), sentiment);
            String text = sentence.get(CoreAnnotations.TextAnnotation.class);
            pw.println(text);
            // display the token-level annotations
            String[] tokenAnnotations = { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" };
            for (CoreLabel token : tokens) {
                pw.print(token.toShorterString(tokenAnnotations));
                pw.println();
            }
            // display the parse tree for this sentence
            Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (tree != null) {
                options.constituentTreePrinter.printTree(tree, pw);
            }
            // language which doesn't have dependencies, for example.
            if (sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class) != null) {
                pw.print(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).toList());
                pw.println();
            }
            // display MachineReading entities and relations
            List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
            if (entities != null) {
                pw.println("Extracted the following MachineReading entity mentions:");
                for (EntityMention e : entities) {
                    pw.print('\t');
                    pw.println(e);
                }
            }
            List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
            if (relations != null) {
                pw.println("Extracted the following MachineReading relation mentions:");
                for (RelationMention r : relations) {
                    if (r.printableObject(beam)) {
                        pw.println(r);
                    }
                }
            }
            // display OpenIE triples
            Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
            if (openieTriples != null && openieTriples.size() > 0) {
                pw.println("Extracted the following Open IE triples:");
                for (RelationTriple triple : openieTriples) {
                    pw.println(OpenIE.tripleToString(triple, docId, sentence));
                }
            }
            // display KBP triples
            Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
            if (kbpTriples != null && kbpTriples.size() > 0) {
                pw.println("Extracted the following KBP triples:");
                for (RelationTriple triple : kbpTriples) {
                    pw.println(triple.toString());
                }
            }
        }
    }
    // display the old-style doc-level coref annotations
    // this is not supported anymore!
    //String corefAnno = annotation.get(CorefPLAnnotation.class);
    //if(corefAnno != null) os.println(corefAnno);
    // display the new-style coreference graph
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    if (corefChains != null && sentences != null) {
        for (CorefChain chain : corefChains.values()) {
            CorefChain.CorefMention representative = chain.getRepresentativeMention();
            boolean outputHeading = false;
            for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
                if (mention == representative)
                    continue;
                if (!outputHeading) {
                    outputHeading = true;
                    pw.println("Coreference set:");
                }
                // all offsets start at 1!
                pw.printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative.endIndex, mention.mentionSpan, representative.mentionSpan);
            }
        }
    }
    // display quotes if available
    if (annotation.get(CoreAnnotations.QuotationsAnnotation.class) != null) {
        pw.println("Extracted quotes: ");
        List<CoreMap> allQuotes = QuoteAnnotator.gatherQuotes(annotation);
        for (CoreMap quote : allQuotes) {
            pw.printf("[QuotationIndexAnnotation=%d, CharacterOffsetBegin=%d, Text=%s]%n", quote.get(CoreAnnotations.QuotationIndexAnnotation.class), quote.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), quote.get(CoreAnnotations.TextAnnotation.class));
        }
    }
    pw.flush();
}
Also used : RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) Tree(edu.stanford.nlp.trees.Tree) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CorefChain (edu.stanford.nlp.coref.data.CorefChain)27 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)17 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)12 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)10 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)7 Tree (edu.stanford.nlp.trees.Tree)7 CoreMap (edu.stanford.nlp.util.CoreMap)7 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)6 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)6 CorefMention (edu.stanford.nlp.coref.data.CorefChain.CorefMention)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 SentimentCoreAnnotations (edu.stanford.nlp.sentiment.SentimentCoreAnnotations)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)3 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)3 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)3 Span (edu.stanford.nlp.ie.machinereading.structure.Span)3 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)3