Search in sources :

Example 56 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class Document method setParagraphAnnotation.

/** Set paragraph index */
private void setParagraphAnnotation() {
    int paragraphIndex = 0;
    int previousOffset = -10;
    for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
            if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
                if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2)
                    paragraphIndex++;
                w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex);
                previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            } else {
                w.set(CoreAnnotations.ParagraphAnnotation.class, -1);
            }
        }
    }
    for (List<Mention> l : predictedOrderedMentionsBySentence) {
        for (Mention m : l) {
            m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class);
        }
    }
    numParagraph = paragraphIndex;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 57 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class KBPTokensregexExtractor method classify.

@Override
public Pair<String, Double> classify(KBPInput input) {
    // Annotate Sentence
    CoreMap sentenceAsMap = input.sentence.asCoreMap(Sentence::nerTags);
    List<CoreLabel> tokens = sentenceAsMap.get(CoreAnnotations.TokensAnnotation.class);
    // Annotate where the subject is
    for (int i : input.subjectSpan) {
        tokens.get(i).set(Subject.class, "true");
        if ("O".equals(tokens.get(i).ner())) {
            tokens.get(i).setNER(input.subjectType.name);
        }
    }
    // Annotate where the object is
    for (int i : input.objectSpan) {
        tokens.get(i).set(Object.class, "true");
        if ("O".equals(tokens.get(i).ner())) {
            tokens.get(i).setNER(input.objectType.name);
        }
    }
    // Run Rules
    for (RelationType rel : RelationType.values()) {
        if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) {
            CoreMapExpressionExtractor extractor = rules.get(rel);
            @SuppressWarnings("unchecked") List<MatchedExpression> extractions = extractor.extractExpressions(sentenceAsMap);
            if (extractions != null && extractions.size() > 0) {
                MatchedExpression best = MatchedExpression.getBestMatched(extractions, MatchedExpression.EXPR_WEIGHT_SCORER);
                // Un-Annotate Sentence
                for (CoreLabel token : tokens) {
                    token.remove(Subject.class);
                    token.remove(Object.class);
                }
                return Pair.makePair(rel.canonicalName, best.getWeight());
            }
        }
    }
    // Un-Annotate Sentence
    for (CoreLabel token : tokens) {
        token.remove(Subject.class);
        token.remove(Object.class);
    }
    return Pair.makePair(NO_RELATION, 1.0);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreMapExpressionExtractor(edu.stanford.nlp.ling.tokensregex.CoreMapExpressionExtractor) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Sentence(edu.stanford.nlp.simple.Sentence) MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression)

Example 58 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class BasicEntityExtractor method annotate.

/**
   * Annotate an ExtractionDataSet with entities. This will modify the
   * ExtractionDataSet in place.
   *
   * @param doc The dataset to label
   */
@Override
public void annotate(Annotation doc) {
    if (SAVE_CONLL_2003) {
        // dump a file in CoNLL-2003 format
        try {
            PrintStream os = new PrintStream(new FileOutputStream("test.conll"));
            List<List<CoreLabel>> labels = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO);
            BasicEntityExtractor.saveCoNLL(os, labels, true);
            // saveCoNLLFiles("/tmp/ace/test", doc, useSubTypes, useBIO);
            os.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }
    List<CoreMap> sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
    int sentCount = 1;
    for (CoreMap sentence : sents) {
        if (useNERTags) {
            this.makeAnnotationFromAllNERTags(sentence);
        } else
            extractEntities(sentence, sentCount);
        sentCount++;
    }
/*
    if(SAVE_CONLL_2003){
      try {
        saveCoNLLFiles("test_output/", doc, useSubTypes, useBIO);
        log.info("useBIO = " + useBIO);
      } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
      }
    }
    */
}
Also used : PrintStream(java.io.PrintStream) FileOutputStream(java.io.FileOutputStream) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 59 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class EntityExtractorResultsPrinter method printResults.

@Override
public void printResults(PrintWriter pw, List<CoreMap> goldStandard, List<CoreMap> extractorOutput) {
    ResultsPrinter.align(goldStandard, extractorOutput);
    Counter<String> correct = new ClassicCounter<>();
    Counter<String> predicted = new ClassicCounter<>();
    Counter<String> gold = new ClassicCounter<>();
    for (int i = 0; i < goldStandard.size(); i++) {
        CoreMap goldSent = goldStandard.get(i);
        CoreMap sysSent = extractorOutput.get(i);
        String sysText = sysSent.get(TextAnnotation.class);
        String goldText = goldSent.get(TextAnnotation.class);
        if (verbose) {
            log.info("SCORING THE FOLLOWING SENTENCE:");
            log.info(sysSent.get(CoreAnnotations.TokensAnnotation.class));
        }
        HashSet<String> matchedGolds = new HashSet<>();
        List<EntityMention> goldEntities = goldSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (goldEntities == null) {
            goldEntities = new ArrayList<>();
        }
        for (EntityMention m : goldEntities) {
            String label = makeLabel(m);
            if (excludedClasses != null && excludedClasses.contains(label))
                continue;
            gold.incrementCount(label);
        }
        List<EntityMention> sysEntities = sysSent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (sysEntities == null) {
            sysEntities = new ArrayList<>();
        }
        for (EntityMention m : sysEntities) {
            String label = makeLabel(m);
            if (excludedClasses != null && excludedClasses.contains(label))
                continue;
            predicted.incrementCount(label);
            if (verbose)
                log.info("COMPARING PREDICTED MENTION: " + m);
            boolean found = false;
            for (EntityMention gm : goldEntities) {
                if (matchedGolds.contains(gm.getObjectId()))
                    continue;
                if (verbose)
                    log.info("\tagainst: " + gm);
                if (gm.equals(m, useSubTypes)) {
                    if (verbose)
                        log.info("\t\t\tMATCH!");
                    found = true;
                    matchedGolds.add(gm.getObjectId());
                    if (verboseInstances) {
                        log.info("TRUE POSITIVE: " + m + " matched " + gm);
                        log.info("In sentence: " + sysText);
                    }
                    break;
                }
            }
            if (found) {
                correct.incrementCount(label);
            } else if (verboseInstances) {
                log.info("FALSE POSITIVE: " + m.toString());
                log.info("In sentence: " + sysText);
            }
        }
        if (verboseInstances) {
            for (EntityMention m : goldEntities) {
                String label = makeLabel(m);
                if (!matchedGolds.contains(m.getObjectId()) && (excludedClasses == null || !excludedClasses.contains(label))) {
                    log.info("FALSE NEGATIVE: " + m.toString());
                    log.info("In sentence: " + goldText);
                }
            }
        }
    }
    double totalCount = 0;
    double totalCorrect = 0;
    double totalPredicted = 0;
    pw.println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF");
    List<String> labels = new ArrayList<>(gold.keySet());
    Collections.sort(labels);
    for (String label : labels) {
        if (excludedClasses != null && excludedClasses.contains(label))
            continue;
        double numCorrect = correct.getCount(label);
        double numPredicted = predicted.getCount(label);
        double trueCount = gold.getCount(label);
        double precision = (numPredicted > 0) ? (numCorrect / numPredicted) : 0;
        double recall = numCorrect / trueCount;
        double f = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
        pw.println(StringUtils.padOrTrim(label, 21) + "\t" + numCorrect + "\t" + numPredicted + "\t" + trueCount + "\t" + FORMATTER.format(precision * 100) + "\t" + FORMATTER.format(100 * recall) + "\t" + FORMATTER.format(100 * f));
        totalCount += trueCount;
        totalCorrect += numCorrect;
        totalPredicted += numPredicted;
    }
    double precision = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0;
    double recall = totalCorrect / totalCount;
    double f = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
    pw.println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + FORMATTER.format(100 * precision) + "\t" + FORMATTER.format(100 * recall) + "\t" + FORMATTER.format(100 * f));
}
Also used : ArrayList(java.util.ArrayList) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) CoreMap(edu.stanford.nlp.util.CoreMap) HashSet(java.util.HashSet)

Example 60 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class MachineReading method removeSkippableRelations.

/**
   * Removes any relations with relation types in relationsToSkip from a dataset.  Dataset is modified in place.
   */
private static void removeSkippableRelations(Annotation dataset, Set<String> relationsToSkip) {
    if (relationsToSkip == null || relationsToSkip.isEmpty()) {
        return;
    }
    for (CoreMap sent : dataset.get(CoreAnnotations.SentencesAnnotation.class)) {
        List<RelationMention> relationMentions = sent.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
        if (relationMentions == null) {
            continue;
        }
        List<RelationMention> newRelationMentions = new ArrayList<>();
        for (RelationMention rm : relationMentions) {
            if (!relationsToSkip.contains(rm.getType())) {
                newRelationMentions.add(rm);
            }
        }
        sent.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, newRelationMentions);
    }
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)253 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)172 CoreLabel (edu.stanford.nlp.ling.CoreLabel)102 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)53 Annotation (edu.stanford.nlp.pipeline.Annotation)49 Tree (edu.stanford.nlp.trees.Tree)28 Properties (java.util.Properties)23 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)20 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9