use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class DcorefExactOutputITest method testCoref.
public void testCoref() throws IOException {
String doc = IOUtils.slurpFile("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm");
Annotation annotation = pipeline.process(doc);
Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
Map<Integer, List<ExpectedMention>> expected = loadExpectedResults("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.expectedcoref");
compareResults(expected, chains);
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class DcorefExactOutputITest method compareResults.
public static void compareResults(Map<Integer, List<ExpectedMention>> expected, Map<Integer, CorefChain> chains) {
assertEquals("Unexpected difference in number of chains", expected.size(), chains.size());
// Note that we don't insist on the chain ID numbers being the same
for (Map.Entry<Integer, List<ExpectedMention>> mapEntry : expected.entrySet()) {
boolean found = false;
List<ExpectedMention> expectedChain = mapEntry.getValue();
for (CorefChain chain : chains.values()) {
if (compareChain(expectedChain, chain)) {
found = true;
break;
}
}
assertTrue("Could not find expected coref chain " + mapEntry.getKey() + " " + expectedChain + " in the results", found);
}
for (Map.Entry<Integer, CorefChain> integerCorefChainEntry : chains.entrySet()) {
boolean found = false;
CorefChain chain = integerCorefChainEntry.getValue();
for (List<ExpectedMention> expectedChain : expected.values()) {
if (compareChain(expectedChain, chain)) {
found = true;
break;
}
}
assertTrue("Dcoref produced chain " + chain + " which was not in the expected results", found);
}
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class XMLOutputter method annotationToDoc.
/**
* Converts the given annotation to an XML document using the specified options
*/
public static Document annotationToDoc(Annotation annotation, Options options) {
//
// create the XML document with the root node pointing to the namespace URL
//
Element root = new Element("root", NAMESPACE_URI);
Document xmlDoc = new Document(root);
ProcessingInstruction pi = new ProcessingInstruction("xml-stylesheet", "href=\"" + STYLESHEET_NAME + "\" type=\"text/xsl\"");
xmlDoc.insertChild(pi, 0);
Element docElem = new Element("document", NAMESPACE_URI);
root.appendChild(docElem);
setSingleElement(docElem, "docId", NAMESPACE_URI, annotation.get(CoreAnnotations.DocIDAnnotation.class));
setSingleElement(docElem, "docDate", NAMESPACE_URI, annotation.get(CoreAnnotations.DocDateAnnotation.class));
setSingleElement(docElem, "docSourceType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class));
setSingleElement(docElem, "docType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocTypeAnnotation.class));
setSingleElement(docElem, "author", NAMESPACE_URI, annotation.get(CoreAnnotations.AuthorAnnotation.class));
setSingleElement(docElem, "location", NAMESPACE_URI, annotation.get(CoreAnnotations.LocationAnnotation.class));
if (options.includeText) {
setSingleElement(docElem, "text", NAMESPACE_URI, annotation.get(CoreAnnotations.TextAnnotation.class));
}
Element sentencesElem = new Element("sentences", NAMESPACE_URI);
docElem.appendChild(sentencesElem);
//
if (annotation.get(CoreAnnotations.SentencesAnnotation.class) != null) {
int sentCount = 1;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Element sentElem = new Element("sentence", NAMESPACE_URI);
sentElem.addAttribute(new Attribute("id", Integer.toString(sentCount)));
Integer lineNumber = sentence.get(CoreAnnotations.LineNumberAnnotation.class);
if (lineNumber != null) {
sentElem.addAttribute(new Attribute("line", Integer.toString(lineNumber)));
}
sentCount++;
// add the word table with all token-level annotations
Element wordTable = new Element("tokens", NAMESPACE_URI);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (int j = 0; j < tokens.size(); j++) {
Element wordInfo = new Element("token", NAMESPACE_URI);
addWordInfo(wordInfo, tokens.get(j), j + 1, NAMESPACE_URI);
wordTable.appendChild(wordInfo);
}
sentElem.appendChild(wordTable);
// add tree info
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
// add the constituent tree for this sentence
Element parseInfo = new Element("parse", NAMESPACE_URI);
addConstituentTreeInfo(parseInfo, tree, options.constituentTreePrinter);
sentElem.appendChild(parseInfo);
}
SemanticGraph basicDependencies = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
if (basicDependencies != null) {
// add the dependencies for this sentence
Element depInfo = buildDependencyTreeInfo("basic-dependencies", sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("collapsed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("collapsed-ccprocessed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("enhanced-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("enhanced-plus-plus-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
}
// add Open IE triples
Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openieTriples != null) {
Element openieElem = new Element("openie", NAMESPACE_URI);
addTriples(openieTriples, openieElem, NAMESPACE_URI);
sentElem.appendChild(openieElem);
}
// add KBP triples
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null) {
Element kbpElem = new Element("kbp", NAMESPACE_URI);
addTriples(kbpTriples, kbpElem, NAMESPACE_URI);
sentElem.appendChild(kbpElem);
}
// add the MR entities and relations
List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (entities != null && !entities.isEmpty()) {
Element mrElem = new Element("MachineReading", NAMESPACE_URI);
Element entElem = new Element("entities", NAMESPACE_URI);
addEntities(entities, entElem, NAMESPACE_URI);
mrElem.appendChild(entElem);
if (relations != null) {
Element relElem = new Element("relations", NAMESPACE_URI);
addRelations(relations, relElem, NAMESPACE_URI, options.relationsBeam);
mrElem.appendChild(relElem);
}
sentElem.appendChild(mrElem);
}
/**
* Adds sentiment as an attribute of this sentence.
*/
Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
if (sentimentTree != null) {
int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
sentElem.addAttribute(new Attribute("sentimentValue", Integer.toString(sentiment)));
String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
sentElem.addAttribute(new Attribute("sentiment", sentimentClass.replaceAll(" ", "")));
}
// add the sentence to the root
sentencesElem.appendChild(sentElem);
}
}
//
// add the coref graph
//
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
Element corefInfo = new Element("coreference", NAMESPACE_URI);
if (addCorefGraphInfo(options, corefInfo, sentences, corefChains, NAMESPACE_URI))
docElem.appendChild(corefInfo);
}
return xmlDoc;
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class StanfordCoreNlpDemo method main.
/** Usage: java -cp "*" StanfordCoreNlpDemo [inputFile [outputTextFile [outputXmlFile]]] */
public static void main(String[] args) throws IOException {
// set up optional output files
PrintWriter out;
if (args.length > 1) {
out = new PrintWriter(args[1]);
} else {
out = new PrintWriter(System.out);
}
PrintWriter xmlOut = null;
if (args.length > 2) {
xmlOut = new PrintWriter(args[2]);
}
// Create a CoreNLP pipeline. To build the default pipeline, you can just use:
// StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// Here's a more complex setup example:
// Properties props = new Properties();
// props.put("annotators", "tokenize, ssplit, pos, lemma, ner, depparse");
// props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
// props.put("ner.applyNumericClassifiers", "false");
// StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// Add in sentiment
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// Initialize an Annotation with some text to be annotated. The text is the argument to the constructor.
Annotation annotation;
if (args.length > 0) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
} else {
annotation = new Annotation("Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
}
// run all the selected Annotators on this text
pipeline.annotate(annotation);
// this prints out the results of sentence analysis to file(s) in good formats
pipeline.prettyPrint(annotation, out);
if (xmlOut != null) {
pipeline.xmlPrint(annotation, xmlOut);
}
// Access the Annotation in code
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println();
out.println("The top level annotation");
out.println(annotation.toShorterString());
out.println();
// An Annotation is a Map with Class keys for the linguistic analysis types.
// You can get and use the various analyses individually.
// For instance, this gets the parse tree of the first sentence in the text.
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences != null && !sentences.isEmpty()) {
CoreMap sentence = sentences.get(0);
out.println("The keys of the first sentence's CoreMap are:");
out.println(sentence.keySet());
out.println();
out.println("The first sentence is:");
out.println(sentence.toShorterString());
out.println();
out.println("The first sentence tokens are:");
for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
out.println(token.toShorterString());
}
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
out.println();
out.println("The first sentence parse tree is:");
tree.pennPrint(out);
out.println();
out.println("The first sentence basic dependencies are:");
out.println(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
out.println("The first sentence collapsed, CC-processed dependencies are:");
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
out.println(graph.toString(SemanticGraph.OutputFormat.LIST));
// Access coreference. In the coreference link graph,
// each chain stores a set of mentions that co-refer with each other,
// along with a method for getting the most representative mention.
// Both sentence and token offsets start at 1!
out.println("Coreference information");
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains == null) {
return;
}
for (Map.Entry<Integer, CorefChain> entry : corefChains.entrySet()) {
out.println("Chain " + entry.getKey());
for (CorefChain.CorefMention m : entry.getValue().getMentionsInTextualOrder()) {
// We need to subtract one since the indices count from 1 but the Lists start from 0
List<CoreLabel> tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class);
// We subtract two for end: one for 0-based indexing, and one because we want last token of mention not one following.
out.println(" " + m + ", i.e., 0-based character offsets [" + tokens.get(m.startIndex - 1).beginPosition() + ", " + tokens.get(m.endIndex - 2).endPosition() + ")");
}
}
out.println();
out.println("The first sentence overall sentiment rating is " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
}
IOUtils.closeIgnoringExceptions(out);
IOUtils.closeIgnoringExceptions(xmlOut);
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class HybridCorefAnnotator method annotateOldFormat.
private static void annotateOldFormat(Map<Integer, CorefChain> result, Document corefDoc) {
List<Pair<IntTuple, IntTuple>> links = getLinks(result);
Annotation annotation = corefDoc.annotation;
if (VERBOSE) {
System.err.printf("Found %d coreference links:%n", links.size());
for (Pair<IntTuple, IntTuple> link : links) {
System.err.printf("LINK (%d, %d) -> (%d, %d)%n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
}
}
//
// save the coref output as CorefGraphAnnotation
//
// this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
for (Pair<IntTuple, IntTuple> link : links) {
//
// Note: all offsets in the graph start at 1 (not at 0!)
// we do this for consistency reasons, as indices for syntactic dependencies start at 1
//
int srcSent = link.first.get(0);
int srcTok = corefDoc.getOrderedMentions().get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
int dstSent = link.second.get(0);
int dstTok = corefDoc.getOrderedMentions().get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
IntTuple dst = new IntTuple(2);
dst.set(0, dstSent);
dst.set(1, dstTok);
IntTuple src = new IntTuple(2);
src.set(0, srcSent);
src.set(1, srcTok);
graph.add(new Pair<>(src, dst));
}
annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
for (CorefChain corefChain : result.values()) {
if (corefChain.getMentionsInTextualOrder().size() < 2)
continue;
Set<CoreLabel> coreferentTokens = Generics.newHashSet();
for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
coreferentTokens.add(token);
}
for (CoreLabel token : coreferentTokens) {
token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
}
}
}
Aggregations