use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class XMLOutputter method toXML.
private static Element toXML(RelationMention relation, String curNS) {
Element top = new Element("relation", curNS);
top.addAttribute(new Attribute("id", relation.getObjectId()));
Element type = new Element("type", curNS);
type.appendChild(relation.getType());
top.appendChild(relation.getType());
if (relation.getSubType() != null) {
Element subtype = new Element("subtype", curNS);
subtype.appendChild(relation.getSubType());
top.appendChild(relation.getSubType());
}
List<EntityMention> mentions = relation.getEntityMentionArgs();
Element args = new Element("arguments", curNS);
for (EntityMention e : mentions) {
args.appendChild(toXML(e, curNS));
}
top.appendChild(args);
top.appendChild(makeProbabilitiesElement(relation, curNS));
return top;
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class XMLOutputter method addEntities.
/**
* Generates the XML content for MachineReading entities.
*/
private static void addEntities(List<EntityMention> entities, Element top, String curNS) {
for (EntityMention e : entities) {
Element ee = toXML(e, curNS);
top.appendChild(ee);
}
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class XMLOutputter method annotationToDoc.
/**
* Converts the given annotation to an XML document using the specified options
*/
public static Document annotationToDoc(Annotation annotation, Options options) {
//
// create the XML document with the root node pointing to the namespace URL
//
Element root = new Element("root", NAMESPACE_URI);
Document xmlDoc = new Document(root);
ProcessingInstruction pi = new ProcessingInstruction("xml-stylesheet", "href=\"" + STYLESHEET_NAME + "\" type=\"text/xsl\"");
xmlDoc.insertChild(pi, 0);
Element docElem = new Element("document", NAMESPACE_URI);
root.appendChild(docElem);
setSingleElement(docElem, "docId", NAMESPACE_URI, annotation.get(CoreAnnotations.DocIDAnnotation.class));
setSingleElement(docElem, "docDate", NAMESPACE_URI, annotation.get(CoreAnnotations.DocDateAnnotation.class));
setSingleElement(docElem, "docSourceType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class));
setSingleElement(docElem, "docType", NAMESPACE_URI, annotation.get(CoreAnnotations.DocTypeAnnotation.class));
setSingleElement(docElem, "author", NAMESPACE_URI, annotation.get(CoreAnnotations.AuthorAnnotation.class));
setSingleElement(docElem, "location", NAMESPACE_URI, annotation.get(CoreAnnotations.LocationAnnotation.class));
if (options.includeText) {
setSingleElement(docElem, "text", NAMESPACE_URI, annotation.get(CoreAnnotations.TextAnnotation.class));
}
Element sentencesElem = new Element("sentences", NAMESPACE_URI);
docElem.appendChild(sentencesElem);
//
if (annotation.get(CoreAnnotations.SentencesAnnotation.class) != null) {
int sentCount = 1;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Element sentElem = new Element("sentence", NAMESPACE_URI);
sentElem.addAttribute(new Attribute("id", Integer.toString(sentCount)));
Integer lineNumber = sentence.get(CoreAnnotations.LineNumberAnnotation.class);
if (lineNumber != null) {
sentElem.addAttribute(new Attribute("line", Integer.toString(lineNumber)));
}
sentCount++;
// add the word table with all token-level annotations
Element wordTable = new Element("tokens", NAMESPACE_URI);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (int j = 0; j < tokens.size(); j++) {
Element wordInfo = new Element("token", NAMESPACE_URI);
addWordInfo(wordInfo, tokens.get(j), j + 1, NAMESPACE_URI);
wordTable.appendChild(wordInfo);
}
sentElem.appendChild(wordTable);
// add tree info
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
// add the constituent tree for this sentence
Element parseInfo = new Element("parse", NAMESPACE_URI);
addConstituentTreeInfo(parseInfo, tree, options.constituentTreePrinter);
sentElem.appendChild(parseInfo);
}
SemanticGraph basicDependencies = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
if (basicDependencies != null) {
// add the dependencies for this sentence
Element depInfo = buildDependencyTreeInfo("basic-dependencies", sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("collapsed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("collapsed-ccprocessed-dependencies", sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("enhanced-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
depInfo = buildDependencyTreeInfo("enhanced-plus-plus-dependencies", sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class), tokens, NAMESPACE_URI);
if (depInfo != null) {
sentElem.appendChild(depInfo);
}
}
// add Open IE triples
Collection<RelationTriple> openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openieTriples != null) {
Element openieElem = new Element("openie", NAMESPACE_URI);
addTriples(openieTriples, openieElem, NAMESPACE_URI);
sentElem.appendChild(openieElem);
}
// add KBP triples
Collection<RelationTriple> kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null) {
Element kbpElem = new Element("kbp", NAMESPACE_URI);
addTriples(kbpTriples, kbpElem, NAMESPACE_URI);
sentElem.appendChild(kbpElem);
}
// add the MR entities and relations
List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (entities != null && !entities.isEmpty()) {
Element mrElem = new Element("MachineReading", NAMESPACE_URI);
Element entElem = new Element("entities", NAMESPACE_URI);
addEntities(entities, entElem, NAMESPACE_URI);
mrElem.appendChild(entElem);
if (relations != null) {
Element relElem = new Element("relations", NAMESPACE_URI);
addRelations(relations, relElem, NAMESPACE_URI, options.relationsBeam);
mrElem.appendChild(relElem);
}
sentElem.appendChild(mrElem);
}
/**
* Adds sentiment as an attribute of this sentence.
*/
Tree sentimentTree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
if (sentimentTree != null) {
int sentiment = RNNCoreAnnotations.getPredictedClass(sentimentTree);
sentElem.addAttribute(new Attribute("sentimentValue", Integer.toString(sentiment)));
String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
sentElem.addAttribute(new Attribute("sentiment", sentimentClass.replaceAll(" ", "")));
}
// add the sentence to the root
sentencesElem.appendChild(sentElem);
}
}
//
// add the coref graph
//
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
Element corefInfo = new Element("coreference", NAMESPACE_URI);
if (addCorefGraphInfo(options, corefInfo, sentences, corefChains, NAMESPACE_URI))
docElem.appendChild(corefInfo);
}
return xmlDoc;
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class ACEMentionExtractor method extractGoldMentions.
private void extractGoldMentions(CoreMap s, List<List<Mention>> allGoldMentions, EntityComparator comparator) {
List<Mention> goldMentions = new ArrayList<>();
allGoldMentions.add(goldMentions);
List<EntityMention> goldMentionList = s.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
List<CoreLabel> words = s.get(CoreAnnotations.TokensAnnotation.class);
TreeSet<EntityMention> treeForSortGoldMentions = new TreeSet<>(comparator);
if (goldMentionList != null)
treeForSortGoldMentions.addAll(goldMentionList);
if (!treeForSortGoldMentions.isEmpty()) {
for (EntityMention e : treeForSortGoldMentions) {
Mention men = new Mention();
men.dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
if (men.dependency == null) {
men.dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
}
men.startIndex = e.getExtentTokenStart();
men.endIndex = e.getExtentTokenEnd();
String[] parseID = e.getObjectId().split("-");
men.mentionID = Integer.parseInt(parseID[parseID.length - 1]);
String[] parseCorefID = e.getCorefID().split("-E");
men.goldCorefClusterID = Integer.parseInt(parseCorefID[parseCorefID.length - 1]);
men.originalRef = -1;
for (int j = allGoldMentions.size() - 1; j >= 0; j--) {
List<Mention> l = allGoldMentions.get(j);
for (int k = l.size() - 1; k >= 0; k--) {
Mention m = l.get(k);
if (men.goldCorefClusterID == m.goldCorefClusterID) {
men.originalRef = m.mentionID;
}
}
}
goldMentions.add(men);
if (men.mentionID > maxID)
maxID = men.mentionID;
// set ner type
for (int j = e.getExtentTokenStart(); j < e.getExtentTokenEnd(); j++) {
CoreLabel word = words.get(j);
String ner = e.getType() + "-" + e.getSubType();
if (Constants.USE_GOLD_NE) {
word.set(CoreAnnotations.EntityTypeAnnotation.class, e.getMentionType());
if (e.getMentionType().equals("NAM"))
word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
}
}
}
}
}
use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.
the class EntityExtractorResultsPrinter method printResults.
@Override
public void printResults(PrintWriter pw, List<CoreMap> goldStandard, List<CoreMap> extractorOutput) {
ResultsPrinter.align(goldStandard, extractorOutput);
Counter<String> correct = new ClassicCounter<>();
Counter<String> predicted = new ClassicCounter<>();
Counter<String> gold = new ClassicCounter<>();
for (int i = 0; i < goldStandard.size(); i++) {
CoreMap goldSent = goldStandard.get(i);
CoreMap sysSent = extractorOutput.get(i);
String sysText = sysSent.get(TextAnnotation.class);
String goldText = goldSent.get(TextAnnotation.class);
if (verbose) {
log.info("SCORING THE FOLLOWING SENTENCE:");
log.info(sysSent.get(CoreAnnotations.TokensAnnotation.class));
}
HashSet<String> matchedGolds = new HashSet<>();
List<EntityMention> goldEntities = goldSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (goldEntities == null) {
goldEntities = new ArrayList<>();
}
for (EntityMention m : goldEntities) {
String label = makeLabel(m);
if (excludedClasses != null && excludedClasses.contains(label))
continue;
gold.incrementCount(label);
}
List<EntityMention> sysEntities = sysSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (sysEntities == null) {
sysEntities = new ArrayList<>();
}
for (EntityMention m : sysEntities) {
String label = makeLabel(m);
if (excludedClasses != null && excludedClasses.contains(label))
continue;
predicted.incrementCount(label);
if (verbose)
log.info("COMPARING PREDICTED MENTION: " + m);
boolean found = false;
for (EntityMention gm : goldEntities) {
if (matchedGolds.contains(gm.getObjectId()))
continue;
if (verbose)
log.info("\tagainst: " + gm);
if (gm.equals(m, useSubTypes)) {
if (verbose)
log.info("\t\t\tMATCH!");
found = true;
matchedGolds.add(gm.getObjectId());
if (verboseInstances) {
log.info("TRUE POSITIVE: " + m + " matched " + gm);
log.info("In sentence: " + sysText);
}
break;
}
}
if (found) {
correct.incrementCount(label);
} else if (verboseInstances) {
log.info("FALSE POSITIVE: " + m.toString());
log.info("In sentence: " + sysText);
}
}
if (verboseInstances) {
for (EntityMention m : goldEntities) {
String label = makeLabel(m);
if (!matchedGolds.contains(m.getObjectId()) && (excludedClasses == null || !excludedClasses.contains(label))) {
log.info("FALSE NEGATIVE: " + m.toString());
log.info("In sentence: " + goldText);
}
}
}
}
double totalCount = 0;
double totalCorrect = 0;
double totalPredicted = 0;
pw.println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF");
List<String> labels = new ArrayList<>(gold.keySet());
Collections.sort(labels);
for (String label : labels) {
if (excludedClasses != null && excludedClasses.contains(label))
continue;
double numCorrect = correct.getCount(label);
double numPredicted = predicted.getCount(label);
double trueCount = gold.getCount(label);
double precision = (numPredicted > 0) ? (numCorrect / numPredicted) : 0;
double recall = numCorrect / trueCount;
double f = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
pw.println(StringUtils.padOrTrim(label, 21) + "\t" + numCorrect + "\t" + numPredicted + "\t" + trueCount + "\t" + FORMATTER.format(precision * 100) + "\t" + FORMATTER.format(100 * recall) + "\t" + FORMATTER.format(100 * f));
totalCount += trueCount;
totalCorrect += numCorrect;
totalPredicted += numPredicted;
}
double precision = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0;
double recall = totalCorrect / totalCount;
double f = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
pw.println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + FORMATTER.format(100 * precision) + "\t" + FORMATTER.format(100 * recall) + "\t" + FORMATTER.format(100 * f));
}
Aggregations