use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class OpenIE method annotate.
/**
* {@inheritDoc}
*
* <p>
* This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
* and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
* </p>
*/
@Override
public void annotate(Annotation annotation) {
// Accumulate Coref data
Map<Integer, CorefChain> corefChains;
Map<CoreLabel, List<CoreLabel>> canonicalMentionMap = new IdentityHashMap<>();
if (resolveCoref && (corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class)) != null) {
for (CorefChain chain : corefChains.values()) {
// Make sure it's a real chain and not a singleton
if (chain.getMentionsInTextualOrder().size() < 2) {
continue;
}
// Metadata
List<CoreLabel> canonicalMention = null;
double canonicalMentionScore = Double.NEGATIVE_INFINITY;
Set<CoreLabel> tokensToMark = new HashSet<>();
List<CorefChain.CorefMention> mentions = chain.getMentionsInTextualOrder();
// Iterate over mentions
for (int i = 0; i < mentions.size(); ++i) {
// Get some data on this mention
Pair<List<CoreLabel>, Double> info = grokCorefMention(annotation, mentions.get(i));
// Figure out if it should be the canonical mention
double score = info.second + ((double) i) / ((double) mentions.size()) + (mentions.get(i) == chain.getRepresentativeMention() ? 1.0 : 0.0);
if (canonicalMention == null || score > canonicalMentionScore) {
canonicalMention = info.first;
canonicalMentionScore = score;
}
// Register the participating tokens
if (info.first.size() == 1) {
// Only mark single-node tokens!
tokensToMark.addAll(info.first);
}
}
// Mark the tokens as coreferent
assert canonicalMention != null;
for (CoreLabel token : tokensToMark) {
List<CoreLabel> existingMention = canonicalMentionMap.get(token);
if (existingMention == null || existingMention.isEmpty() || "O".equals(existingMention.get(0).ner())) {
// Don't clobber existing good mentions
canonicalMentionMap.put(token, canonicalMention);
}
}
}
}
// Annotate each sentence
annotation.get(CoreAnnotations.SentencesAnnotation.class).forEach(x -> this.annotateSentence(x, canonicalMentionMap));
}
use of edu.stanford.nlp.coref.data.CorefChain in project cogcomp-nlp by CogComp.
the class StanfordCorefHandler method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
Annotation document = new Annotation(ta.text);
pipeline.annotate(document);
CoreferenceView vu = new CoreferenceView(viewName, ta);
Map corefChain = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
for (Object key : corefChain.keySet()) {
CorefChain chain = (CorefChain) corefChain.get(key);
Constituent representative = createConstituentGivenMention(document, chain, chain.getRepresentativeMention(), ta);
List<Constituent> consList = new ArrayList<>();
for (CorefChain.CorefMention m : chain.getMentionsInTextualOrder()) {
consList.add(createConstituentGivenMention(document, chain, m, ta));
}
// remove the representative itself
consList.remove(representative);
vu.addCorefEdges(representative, consList);
}
ta.addView(viewName, vu);
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class DeterministicCorefAnnotatorITest method testSameString.
/**
* Tests named entities with exact string matches (also tests some more pronouns).
* @throws Exception
*/
public void testSameString() throws Exception {
// create annotation with text
String text = "Your mom thinks she lives in Denver, but it's a big city. She actually lives outside of Denver.";
Annotation document = new Annotation(text);
// annotate text with pipeline
pipeline.annotate(document);
// test CorefChainAnnotation
Map<Integer, CorefChain> chains = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
Assert.assertNotNull(chains);
// test chainID = m.corefClusterID
for (int chainID : chains.keySet()) {
CorefChain c = chains.get(chainID);
for (CorefMention m : c.getMentionsInTextualOrder()) {
Assert.assertEquals(m.corefClusterID, chainID);
}
}
// test CorefClusterIdAnnotation
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
CoreLabel yourMomsToken = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(1);
CoreLabel sheToken1 = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(3);
CoreLabel sheToken2 = sentences.get(1).get(CoreAnnotations.TokensAnnotation.class).get(0);
CoreLabel denverToken1 = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(6);
CoreLabel denverToken2 = sentences.get(1).get(CoreAnnotations.TokensAnnotation.class).get(5);
Integer yourMomsClusterId = yourMomsToken.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
Integer she1ClusterId = sheToken1.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
Integer she2ClusterId = sheToken2.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
Integer denver1ClusterId = denverToken1.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
Integer denver2ClusterId = denverToken2.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
Assert.assertNotNull(yourMomsClusterId);
Assert.assertNotNull(she1ClusterId);
Assert.assertNotNull(she2ClusterId);
Assert.assertNotNull(denver1ClusterId);
Assert.assertNotNull(denver2ClusterId);
Assert.assertSame(yourMomsClusterId, she1ClusterId);
Assert.assertSame(yourMomsClusterId, she2ClusterId);
Assert.assertSame(denver1ClusterId, denver2ClusterId);
Assert.assertNotSame(yourMomsClusterId, denver1ClusterId);
// test CorefClusterAnnotation
// Assert.assertEquals(yourMomsToken.get(CorefCoreAnnotations.CorefClusterAnnotation.class), sheToken1.get(CorefCoreAnnotations.CorefClusterAnnotation.class));
// Assert.assertEquals(yourMomsToken.get(CorefCoreAnnotations.CorefClusterAnnotation.class), sheToken2.get(CorefCoreAnnotations.CorefClusterAnnotation.class));
// Assert.assertEquals(denverToken1.get(CorefCoreAnnotations.CorefClusterAnnotation.class), denverToken2.get(CorefCoreAnnotations.CorefClusterAnnotation.class));
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class DcorefExactOutputITest method main.
/**
* If run as a program, writes the expected output of args[0] to args[1]
*/
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Expected args <input> <output>");
throw new IllegalArgumentException();
}
String input = args[0];
String output = args[1];
Properties props = new Properties();
props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// for example
// "edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm"
String doc = IOUtils.slurpFile(input);
Annotation annotation = pipeline.process(doc);
Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
saveResults(output, chains);
}
use of edu.stanford.nlp.coref.data.CorefChain in project CoreNLP by stanfordnlp.
the class CustomAnnotationSerializer method write.
@Override
public OutputStream write(Annotation corpus, OutputStream os) throws IOException {
if (!(os instanceof GZIPOutputStream)) {
if (compress)
os = new GZIPOutputStream(os);
}
PrintWriter pw = new PrintWriter(os);
// save the coref graph in the new format
Map<Integer, CorefChain> chains = corpus.get(CorefCoreAnnotations.CorefChainAnnotation.class);
saveCorefChains(chains, pw);
// save the coref graph on one line
// Note: this is the old format!
List<Pair<IntTuple, IntTuple>> corefGraph = corpus.get(CorefCoreAnnotations.CorefGraphAnnotation.class);
if (corefGraph != null) {
boolean first = true;
for (Pair<IntTuple, IntTuple> arc : corefGraph) {
if (!first)
pw.print(" ");
pw.printf("%d %d %d %d", arc.first.get(0), arc.first.get(1), arc.second.get(0), arc.second.get(1));
first = false;
}
}
pw.println();
// save sentences separated by an empty line
List<CoreMap> sentences = corpus.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sent : sentences) {
// save the parse tree first, on a single line
Tree tree = sent.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
String treeString = tree.toString();
// no \n allowed in the parse tree string (might happen due to tokenization of HTML/XML/RDF tags)
treeString = treeString.replaceAll("\n", " ");
pw.println(treeString);
} else
pw.println();
SemanticGraph collapsedDeps = sent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
saveDependencyGraph(collapsedDeps, pw);
SemanticGraph uncollapsedDeps = sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
saveDependencyGraph(uncollapsedDeps, pw);
SemanticGraph ccDeps = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
saveDependencyGraph(ccDeps, pw);
// save all sentence tokens
List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class);
if (tokens != null) {
for (CoreLabel token : tokens) {
saveToken(token, haveExplicitAntecedent, pw);
pw.println();
}
}
// add an empty line after every sentence
pw.println();
}
pw.flush();
return os;
}
Aggregations