Search in sources :

Example 1 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class DcorefExactOutputITest method testCoref.

public void testCoref() throws IOException {
    String doc = IOUtils.slurpFile("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm");
    Annotation annotation = pipeline.process(doc);
    Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    Map<Integer, List<ExpectedMention>> expected = loadExpectedResults("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.expectedcoref");
    compareResults(expected, chains);
}
Also used : CorefChain(edu.stanford.nlp.coref.data.CorefChain) ArrayList(java.util.ArrayList) List(java.util.List) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 2 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class DcorefExactOutputITest method compareResults.

public static void compareResults(Map<Integer, List<ExpectedMention>> expected, Map<Integer, CorefChain> chains) {
    assertEquals("Unexpected difference in number of chains", expected.size(), chains.size());
    // Note that we don't insist on the chain ID numbers being the same
    for (Map.Entry<Integer, List<ExpectedMention>> mapEntry : expected.entrySet()) {
        boolean found = false;
        List<ExpectedMention> expectedChain = mapEntry.getValue();
        for (CorefChain chain : chains.values()) {
            if (compareChain(expectedChain, chain)) {
                found = true;
                break;
            }
        }
        assertTrue("Could not find expected coref chain " + mapEntry.getKey() + " " + expectedChain + " in the results", found);
    }
    for (Map.Entry<Integer, CorefChain> integerCorefChainEntry : chains.entrySet()) {
        boolean found = false;
        CorefChain chain = integerCorefChainEntry.getValue();
        for (List<ExpectedMention> expectedChain : expected.values()) {
            if (compareChain(expectedChain, chain)) {
                found = true;
                break;
            }
        }
        assertTrue("Dcoref produced chain " + chain + " which was not in the expected results", found);
    }
}
Also used : CorefChain(edu.stanford.nlp.coref.data.CorefChain) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map)

Example 3 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class XMLOutputter method annotationToDoc.

/**
   * Converts the given annotation to an XML document using the specified options
   */
public static Document annotationToDoc(Annotation annotation, Options options) {
    //
    // create the XML document with the root node pointing to the namespace URL
    //
    Element root = new Element("root", NAMESPACE_URI);
    Document xmlDoc = new Document(root);
    ProcessingInstruction pi = new ProcessingInstruction("xml-stylesheet", "href=\"" + STYLESHEET_NAME + "\" type=\"text/xsl\"");
    xmlDoc.insertChild(pi, 0);
    Element docElem = new Element("document", NAMESPACE_URI);
    root.appendChild(docElem);
    setSingleElement(docElem, "docId", NAMESPACE_URI, annotation.get(CoreAnnotations.DocIDAnnotation.class));
    setSingleElement(docElem, "docDate", NAMESPACE_URI, annotation.get(CoreAnnotations.DocDateAnnotation.class));
    setSingleElement(docElem, "docSourceType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class));
    setSingleElement(docElem, "docType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocTypeAnnotation.class));
    setSingleElement(docElem, "author", NAMESPACE_URI, annotation.get(CoreAnnotations.AuthorAnnotation.class));
    setSingleElement(docElem, "location", NAMESPACE_URI, annotation.get(CoreAnnotations.LocationAnnotation.class));
    if (options.includeText) {
        setSingleElement(docElem, "text", NAMESPACE_URI, annotation.get(CoreAnnotations.TextAnnotation.class));
    }
    Element sentencesElem = new Element("sentences", NAMESPACE_URI);
    docElem.appendChild(sentencesElem);
    //
    if (annotation.get(CoreAnnotations.SentencesAnnotation.class) != null) {
        int sentCount = 1;
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            Element sentElem = new Element("sentence", NAMESPACE_URI);
            sentElem.addAttribute(new Attribute("id", Integer.toString(sentCount)));
            Integer lineNumber = sentence.get(CoreAnnotations.LineNumberAnnotation.class);
            if (lineNumber != null) {
                sentElem.addAttribute(new Attribute("line", Integer.toString(lineNumber)));
            }
            sentCount++;
            // add the word table with all token-level annotations
            Element wordTable = new Element("tokens", NAMESPACE_URI);
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (int j = 0; j < tokens.size(); j++) {
                Element wordInfo = new Element("token", NAMESPACE_URI);
                addWordInfo(wordInfo, tokens.get(j), j + 1, NAMESPACE_URI);
                wordTable.appendChild(wordInfo);
            }
            sentElem.appendChild(wordTable);
            // add tree info
            Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (tree != null) {
                // add the constituent tree for this sentence
                Element parseInfo = new Element("parse", NAMESPACE_URI);
                addConstituentTreeInfo(parseInfo, tree, options.constituentTreePrinter);
                sentElem.appendChild(parseInfo);
            }
            SemanticGraph basicDependencies = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
            if (basicDependencies != null) {
                // add the dependencies for this sentence
                Element depInfo = buildDependencyTreeInfo("basic-dependencies", sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), tokens, NAMESPACE_URI);
                if (depInfo != null) {
                    sentElem.appendChild(depInfo);
                }
                depInfo = buildDependencyTreeInfo("collapsed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
                if (depInfo != null) {
                    sentElem.appendChild(depInfo);
                }
                depInfo = buildDependencyTreeInfo("collapsed-ccprocessed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
                if (depInfo != null) {
                    sentElem.appendChild(depInfo);
                }
                depInfo = buildDependencyTreeInfo("enhanced-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
                if (depInfo != null) {
                    sentElem.appendChild(depInfo);
                }
                depInfo = buildDependencyTreeInfo("enhanced-plus-plus-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class), tokens, NAMESPACE_URI);
                if (depInfo != null) {
                    sentElem.appendChild(depInfo);
                }
            }
            // add Open IE triples
            Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
            if (openieTriples != null) {
                Element openieElem = new Element("openie", NAMESPACE_URI);
                addTriples(openieTriples, openieElem, NAMESPACE_URI);
                sentElem.appendChild(openieElem);
            }
            // add KBP triples
            Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
            if (kbpTriples != null) {
                Element kbpElem = new Element("kbp", NAMESPACE_URI);
                addTriples(kbpTriples, kbpElem, NAMESPACE_URI);
                sentElem.appendChild(kbpElem);
            }
            // add the MR entities and relations
            List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
            List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
            if (entities != null && !entities.isEmpty()) {
                Element mrElem = new Element("MachineReading", NAMESPACE_URI);
                Element entElem = new Element("entities", NAMESPACE_URI);
                addEntities(entities, entElem, NAMESPACE_URI);
                mrElem.appendChild(entElem);
                if (relations != null) {
                    Element relElem = new Element("relations", NAMESPACE_URI);
                    addRelations(relations, relElem, NAMESPACE_URI, options.relationsBeam);
                    mrElem.appendChild(relElem);
                }
                sentElem.appendChild(mrElem);
            }
            /**
         * Adds sentiment as an attribute of this sentence.
         */
            Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
            if (sentimentTree != null) {
                int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
                sentElem.addAttribute(new Attribute("sentimentValue", Integer.toString(sentiment)));
                String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
                sentElem.addAttribute(new Attribute("sentiment", sentimentClass.replaceAll(" ", "")));
            }
            // add the sentence to the root
            sentencesElem.appendChild(sentElem);
        }
    }
    //
    // add the coref graph
    //
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    if (corefChains != null) {
        List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
        Element corefInfo = new Element("coreference", NAMESPACE_URI);
        if (addCorefGraphInfo(options, corefInfo, sentences, corefChains, NAMESPACE_URI))
            docElem.appendChild(corefInfo);
    }
    return xmlDoc;
}
Also used : RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) Tree(edu.stanford.nlp.trees.Tree) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TreePrint(edu.stanford.nlp.trees.TreePrint) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 4 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class StanfordCoreNlpDemo method main.

/** Usage: java -cp "*" StanfordCoreNlpDemo [inputFile [outputTextFile [outputXmlFile]]] */
public static void main(String[] args) throws IOException {
    // set up optional output files
    PrintWriter out;
    if (args.length > 1) {
        out = new PrintWriter(args[1]);
    } else {
        out = new PrintWriter(System.out);
    }
    PrintWriter xmlOut = null;
    if (args.length > 2) {
        xmlOut = new PrintWriter(args[2]);
    }
    // Create a CoreNLP pipeline. To build the default pipeline, you can just use:
    //   StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // Here's a more complex setup example:
    //   Properties props = new Properties();
    //   props.put("annotators", "tokenize, ssplit, pos, lemma, ner, depparse");
    //   props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
    //   props.put("ner.applyNumericClassifiers", "false");
    //   StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // Add in sentiment
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor.
    Annotation annotation;
    if (args.length > 0) {
        annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
    } else {
        annotation = new Annotation("Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
    }
    // run all the selected Annotators on this text
    pipeline.annotate(annotation);
    // this prints out the results of sentence analysis to file(s) in good formats
    pipeline.prettyPrint(annotation, out);
    if (xmlOut != null) {
        pipeline.xmlPrint(annotation, xmlOut);
    }
    // Access the Annotation in code
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()
    out.println();
    out.println("The top level annotation");
    out.println(annotation.toShorterString());
    out.println();
    // An Annotation is a Map with Class keys for the linguistic analysis types.
    // You can get and use the various analyses individually.
    // For instance, this gets the parse tree of the first sentence in the text.
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences != null && !sentences.isEmpty()) {
        CoreMap sentence = sentences.get(0);
        out.println("The keys of the first sentence's CoreMap are:");
        out.println(sentence.keySet());
        out.println();
        out.println("The first sentence is:");
        out.println(sentence.toShorterString());
        out.println();
        out.println("The first sentence tokens are:");
        for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            out.println(token.toShorterString());
        }
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        out.println();
        out.println("The first sentence parse tree is:");
        tree.pennPrint(out);
        out.println();
        out.println("The first sentence basic dependencies are:");
        out.println(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
        out.println("The first sentence collapsed, CC-processed dependencies are:");
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
        out.println(graph.toString(SemanticGraph.OutputFormat.LIST));
        // Access coreference. In the coreference link graph,
        // each chain stores a set of mentions that co-refer with each other,
        // along with a method for getting the most representative mention.
        // Both sentence and token offsets start at 1!
        out.println("Coreference information");
        Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
        if (corefChains == null) {
            return;
        }
        for (Map.Entry<Integer, CorefChain> entry : corefChains.entrySet()) {
            out.println("Chain " + entry.getKey());
            for (CorefChain.CorefMention m : entry.getValue().getMentionsInTextualOrder()) {
                // We need to subtract one since the indices count from 1 but the Lists start from 0
                List<CoreLabel> tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class);
                // We subtract two for end: one for 0-based indexing, and one because we want last token of mention not one following.
                out.println("  " + m + ", i.e., 0-based character offsets [" + tokens.get(m.startIndex - 1).beginPosition() + ", " + tokens.get(m.endIndex - 2).endPosition() + ")");
            }
        }
        out.println();
        out.println("The first sentence overall sentiment rating is " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
    }
    IOUtils.closeIgnoringExceptions(out);
    IOUtils.closeIgnoringExceptions(xmlOut);
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefChain(edu.stanford.nlp.coref.data.CorefChain) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 5 with CorefChain

use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.

the class HybridCorefAnnotator method annotateOldFormat.

private static void annotateOldFormat(Map<Integer, CorefChain> result, Document corefDoc) {
    List<Pair<IntTuple, IntTuple>> links = getLinks(result);
    Annotation annotation = corefDoc.annotation;
    if (VERBOSE) {
        System.err.printf("Found %d coreference links:%n", links.size());
        for (Pair<IntTuple, IntTuple> link : links) {
            System.err.printf("LINK (%d, %d) -> (%d, %d)%n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
        }
    }
    //
    // save the coref output as CorefGraphAnnotation
    //
    // this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
    List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
    for (Pair<IntTuple, IntTuple> link : links) {
        //
        // Note: all offsets in the graph start at 1 (not at 0!)
        //       we do this for consistency reasons, as indices for syntactic dependencies start at 1
        //
        int srcSent = link.first.get(0);
        int srcTok = corefDoc.getOrderedMentions().get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
        int dstSent = link.second.get(0);
        int dstTok = corefDoc.getOrderedMentions().get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
        IntTuple dst = new IntTuple(2);
        dst.set(0, dstSent);
        dst.set(1, dstTok);
        IntTuple src = new IntTuple(2);
        src.set(0, srcSent);
        src.set(1, srcTok);
        graph.add(new Pair<>(src, dst));
    }
    annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
    for (CorefChain corefChain : result.values()) {
        if (corefChain.getMentionsInTextualOrder().size() < 2)
            continue;
        Set<CoreLabel> coreferentTokens = Generics.newHashSet();
        for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
            CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
            CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
            coreferentTokens.add(token);
        }
        for (CoreLabel token : coreferentTokens) {
            token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
        }
    }
}
Also used : CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) CorefChainAnnotation(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefChainAnnotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefMention(edu.stanford.nlp.coref.data.CorefChain.CorefMention) CorefChain(edu.stanford.nlp.coref.data.CorefChain)

Aggregations

CorefChain (edu.stanford.nlp.coref.data.CorefChain)27 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)17 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)12 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)10 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)7 Tree (edu.stanford.nlp.trees.Tree)7 CoreMap (edu.stanford.nlp.util.CoreMap)7 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)6 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)6 CorefMention (edu.stanford.nlp.coref.data.CorefChain.CorefMention)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 SentimentCoreAnnotations (edu.stanford.nlp.sentiment.SentimentCoreAnnotations)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)3 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)3 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)3 Span (edu.stanford.nlp.ie.machinereading.structure.Span)3 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)3