use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class Document method setParagraphAnnotation.
/** Set paragraph index */
private void setParagraphAnnotation() {
int paragraphIndex = 0;
int previousOffset = -10;
for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2)
paragraphIndex++;
w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex);
previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
} else {
w.set(CoreAnnotations.ParagraphAnnotation.class, -1);
}
}
}
for (List<Mention> l : predictedOrderedMentionsBySentence) {
for (Mention m : l) {
m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class);
}
}
numParagraph = paragraphIndex;
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class KBPTokensregexExtractor method classify.
@Override
public Pair<String, Double> classify(KBPInput input) {
// Annotate Sentence
CoreMap sentenceAsMap = input.sentence.asCoreMap(Sentence::nerTags);
List<CoreLabel> tokens = sentenceAsMap.get(CoreAnnotations.TokensAnnotation.class);
// Annotate where the subject is
for (int i : input.subjectSpan) {
tokens.get(i).set(Subject.class, "true");
if ("O".equals(tokens.get(i).ner())) {
tokens.get(i).setNER(input.subjectType.name);
}
}
// Annotate where the object is
for (int i : input.objectSpan) {
tokens.get(i).set(Object.class, "true");
if ("O".equals(tokens.get(i).ner())) {
tokens.get(i).setNER(input.objectType.name);
}
}
// Run Rules
for (RelationType rel : RelationType.values()) {
if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) {
CoreMapExpressionExtractor extractor = rules.get(rel);
@SuppressWarnings("unchecked") List<MatchedExpression> extractions = extractor.extractExpressions(sentenceAsMap);
if (extractions != null && extractions.size() > 0) {
MatchedExpression best = MatchedExpression.getBestMatched(extractions, MatchedExpression.EXPR_WEIGHT_SCORER);
// Un-Annotate Sentence
for (CoreLabel token : tokens) {
token.remove(Subject.class);
token.remove(Object.class);
}
return Pair.makePair(rel.canonicalName, best.getWeight());
}
}
}
// Un-Annotate Sentence
for (CoreLabel token : tokens) {
token.remove(Subject.class);
token.remove(Object.class);
}
return Pair.makePair(NO_RELATION, 1.0);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class BasicEntityExtractor method annotate.
/**
* Annotate an ExtractionDataSet with entities. This will modify the
* ExtractionDataSet in place.
*
* @param doc The dataset to label
*/
@Override
public void annotate(Annotation doc) {
if (SAVE_CONLL_2003) {
// dump a file in CoNLL-2003 format
try {
PrintStream os = new PrintStream(new FileOutputStream("test.conll"));
List<List<CoreLabel>> labels = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO);
BasicEntityExtractor.saveCoNLL(os, labels, true);
// saveCoNLLFiles("/tmp/ace/test", doc, useSubTypes, useBIO);
os.close();
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
List<CoreMap> sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
int sentCount = 1;
for (CoreMap sentence : sents) {
if (useNERTags) {
this.makeAnnotationFromAllNERTags(sentence);
} else
extractEntities(sentence, sentCount);
sentCount++;
}
/*
if(SAVE_CONLL_2003){
try {
saveCoNLLFiles("test_output/", doc, useSubTypes, useBIO);
log.info("useBIO = " + useBIO);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
*/
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class EntityExtractorResultsPrinter method printResults.
@Override
public void printResults(PrintWriter pw, List<CoreMap> goldStandard, List<CoreMap> extractorOutput) {
ResultsPrinter.align(goldStandard, extractorOutput);
Counter<String> correct = new ClassicCounter<>();
Counter<String> predicted = new ClassicCounter<>();
Counter<String> gold = new ClassicCounter<>();
for (int i = 0; i < goldStandard.size(); i++) {
CoreMap goldSent = goldStandard.get(i);
CoreMap sysSent = extractorOutput.get(i);
String sysText = sysSent.get(TextAnnotation.class);
String goldText = goldSent.get(TextAnnotation.class);
if (verbose) {
log.info("SCORING THE FOLLOWING SENTENCE:");
log.info(sysSent.get(CoreAnnotations.TokensAnnotation.class));
}
HashSet<String> matchedGolds = new HashSet<>();
List<EntityMention> goldEntities = goldSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (goldEntities == null) {
goldEntities = new ArrayList<>();
}
for (EntityMention m : goldEntities) {
String label = makeLabel(m);
if (excludedClasses != null && excludedClasses.contains(label))
continue;
gold.incrementCount(label);
}
List<EntityMention> sysEntities = sysSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (sysEntities == null) {
sysEntities = new ArrayList<>();
}
for (EntityMention m : sysEntities) {
String label = makeLabel(m);
if (excludedClasses != null && excludedClasses.contains(label))
continue;
predicted.incrementCount(label);
if (verbose)
log.info("COMPARING PREDICTED MENTION: " + m);
boolean found = false;
for (EntityMention gm : goldEntities) {
if (matchedGolds.contains(gm.getObjectId()))
continue;
if (verbose)
log.info("\tagainst: " + gm);
if (gm.equals(m, useSubTypes)) {
if (verbose)
log.info("\t\t\tMATCH!");
found = true;
matchedGolds.add(gm.getObjectId());
if (verboseInstances) {
log.info("TRUE POSITIVE: " + m + " matched " + gm);
log.info("In sentence: " + sysText);
}
break;
}
}
if (found) {
correct.incrementCount(label);
} else if (verboseInstances) {
log.info("FALSE POSITIVE: " + m.toString());
log.info("In sentence: " + sysText);
}
}
if (verboseInstances) {
for (EntityMention m : goldEntities) {
String label = makeLabel(m);
if (!matchedGolds.contains(m.getObjectId()) && (excludedClasses == null || !excludedClasses.contains(label))) {
log.info("FALSE NEGATIVE: " + m.toString());
log.info("In sentence: " + goldText);
}
}
}
}
double totalCount = 0;
double totalCorrect = 0;
double totalPredicted = 0;
pw.println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF");
List<String> labels = new ArrayList<>(gold.keySet());
Collections.sort(labels);
for (String label : labels) {
if (excludedClasses != null && excludedClasses.contains(label))
continue;
double numCorrect = correct.getCount(label);
double numPredicted = predicted.getCount(label);
double trueCount = gold.getCount(label);
double precision = (numPredicted > 0) ? (numCorrect / numPredicted) : 0;
double recall = numCorrect / trueCount;
double f = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
pw.println(StringUtils.padOrTrim(label, 21) + "\t" + numCorrect + "\t" + numPredicted + "\t" + trueCount + "\t" + FORMATTER.format(precision * 100) + "\t" + FORMATTER.format(100 * recall) + "\t" + FORMATTER.format(100 * f));
totalCount += trueCount;
totalCorrect += numCorrect;
totalPredicted += numPredicted;
}
double precision = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0;
double recall = totalCorrect / totalCount;
double f = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
pw.println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + FORMATTER.format(100 * precision) + "\t" + FORMATTER.format(100 * recall) + "\t" + FORMATTER.format(100 * f));
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class MachineReading method removeSkippableRelations.
/**
* Removes any relations with relation types in relationsToSkip from a dataset. Dataset is modified in place.
*/
private static void removeSkippableRelations(Annotation dataset, Set<String> relationsToSkip) {
if (relationsToSkip == null || relationsToSkip.isEmpty()) {
return;
}
for (CoreMap sent : dataset.get(CoreAnnotations.SentencesAnnotation.class)) {
List<RelationMention> relationMentions = sent.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (relationMentions == null) {
continue;
}
List<RelationMention> newRelationMentions = new ArrayList<>();
for (RelationMention rm : relationMentions) {
if (!relationsToSkip.contains(rm.getType())) {
newRelationMentions.add(rm);
}
}
sent.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, newRelationMentions);
}
}
Aggregations