Search in sources :

Example 61 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class ResultsPrinter method printResults.

/**
   * Given a set of sentences with annotations from an information extractor class, and the same sentences
   * with gold-standard annotations, print results on how the information extraction performed.
   */
public String printResults(CoreMap goldStandard, CoreMap extractorOutput) {
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw, true);
    List<CoreMap> mutableGold = new ArrayList<>();
    mutableGold.addAll(goldStandard.get(CoreAnnotations.SentencesAnnotation.class));
    List<CoreMap> mutableOutput = new ArrayList<>();
    mutableOutput.addAll(extractorOutput.get(CoreAnnotations.SentencesAnnotation.class));
    printResults(pw, mutableGold, mutableOutput);
    return sw.getBuffer().toString();
}
Also used : StringWriter(java.io.StringWriter) ArrayList(java.util.ArrayList) CoreMap(edu.stanford.nlp.util.CoreMap) PrintWriter(java.io.PrintWriter)

Example 62 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class NumberSequenceClassifier method classifyWithSUTime.

// todo [cdm, 2013]: Where does this call NumberNormalizer?  Is it the call buried in SUTime's TimeExpressionExtractorImpl?
/**
   * Modular classification using NumberNormalizer for numbers, SUTime for date/time.
   * Note: this is slower than classifyOld because it runs multiple passes
   *   over the tokens (one for numbers and dates, and others for money and ordinals).
   *   However, the slowdown is not substantial since the passes are fast. Plus,
   *   the code is much cleaner than before...
   * @param tokenSequence
   */
private List<CoreLabel> classifyWithSUTime(List<CoreLabel> tokenSequence, final CoreMap document, final CoreMap sentence) {
    //
    for (CoreLabel token : tokenSequence) {
        if (token.get(CoreAnnotations.AnswerAnnotation.class) == null)
            token.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol);
    }
    //
    // run SUTime
    // note: SUTime requires TextAnnotation to be set at document/sent level and
    //   that the Character*Offset annotations be aligned with the token words.
    //   This is guaranteed because here we work on a copy generated by copyTokens()
    //
    CoreMap timeSentence = (sentence != null ? alignSentence(sentence) : buildSentenceFromTokens(tokenSequence));
    List<CoreMap> timeExpressions = runSUTime(timeSentence, document);
    List<CoreMap> numbers = timeSentence.get(CoreAnnotations.NumerizedTokensAnnotation.class);
    //
    if (timeExpressions != null) {
        for (CoreMap timeExpression : timeExpressions) {
            // todo [cdm 2013]: We should also store these in the Sentence, but we've just got the list of tokens here
            int start = timeExpression.get(CoreAnnotations.TokenBeginAnnotation.class);
            int end = timeExpression.get(CoreAnnotations.TokenEndAnnotation.class);
            int offset = 0;
            if (sentence != null && sentence.containsKey(CoreAnnotations.TokenBeginAnnotation.class)) {
                offset = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
            }
            Timex timex = timeExpression.get(TimeAnnotations.TimexAnnotation.class);
            if (timex != null) {
                if (DEBUG) {
                    log.info("FOUND DATE/TIME \"" + timeExpression + "\" with offsets " + start + " " + end + " and value " + timex);
                    log.info("The above CoreMap has the following fields:");
                // for(Class key: timeExpression.keySet()) log.info("\t" + key + ": " + timeExpression.get(key));
                }
                String label = timex.timexType();
                for (int i = start; i < end; i++) {
                    CoreLabel token = tokenSequence.get(i - offset);
                    if (token.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol)) {
                        token.set(CoreAnnotations.AnswerAnnotation.class, label);
                        token.set(TimeAnnotations.TimexAnnotation.class, timex);
                    }
                }
            }
        }
    }
    //
    if (numbers != null) {
        for (CoreMap number : numbers) {
            if (number.containsKey(CoreAnnotations.NumericCompositeValueAnnotation.class)) {
                int start = number.get(CoreAnnotations.TokenBeginAnnotation.class);
                int end = number.get(CoreAnnotations.TokenEndAnnotation.class);
                int offset = 0;
                if (sentence != null && sentence.containsKey(CoreAnnotations.TokenBeginAnnotation.class)) {
                    offset = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
                }
                String type = number.get(CoreAnnotations.NumericCompositeTypeAnnotation.class);
                Number value = number.get(CoreAnnotations.NumericCompositeValueAnnotation.class);
                if (type != null) {
                    if (DEBUG)
                        log.info("FOUND NUMBER \"" + number + "\" with offsets " + start + " " + end + " and value " + value + " and type " + type);
                    for (int i = start; i < end; i++) {
                        CoreLabel token = tokenSequence.get(i - offset);
                        if (token.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol)) {
                            token.set(CoreAnnotations.AnswerAnnotation.class, type);
                            if (value != null) {
                                token.set(CoreAnnotations.NumericCompositeValueAnnotation.class, value);
                            }
                        }
                    }
                }
            }
        }
    }
    // use inverted "CD".equals() because tag could be null (if no POS info available)
    for (CoreLabel token : tokenSequence) {
        if ("CD".equals(token.tag()) && token.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol)) {
            token.set(CoreAnnotations.AnswerAnnotation.class, "NUMBER");
        }
    }
    // extract money and percents
    moneyAndPercentRecognizer(tokenSequence);
    // ordinals
    // NumberNormalizer probably catches these but let's be safe
    ordinalRecognizer(tokenSequence);
    return tokenSequence;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Timex(edu.stanford.nlp.time.Timex) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 63 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class AnnotationUtils method shuffleSentences.

/**
   * Randomized shuffle of all sentences int this dataset
   * @param dataset
   */
public static void shuffleSentences(CoreMap dataset) {
    List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
    // we use a constant seed for replicability of experiments
    Collections.shuffle(sentences, new Random(0));
    dataset.set(CoreAnnotations.SentencesAnnotation.class, sentences);
}
Also used : Random(java.util.Random) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 64 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class AnnotationUtils method entityMentionsToCoreLabels.

/**
   * Converts the labels of all entity mentions in this dataset to sequences of CoreLabels
   * @param dataset
   * @param annotationsToSkip
   * @param useSubTypes
   */
public static List<List<CoreLabel>> entityMentionsToCoreLabels(CoreMap dataset, Set<String> annotationsToSkip, boolean useSubTypes, boolean useBIO) {
    List<List<CoreLabel>> retVal = new ArrayList<>();
    List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        List<CoreLabel> labeledSentence = sentenceEntityMentionsToCoreLabels(sentence, true, annotationsToSkip, null, useSubTypes, useBIO);
        assert (labeledSentence != null);
        retVal.add(labeledSentence);
    }
    return retVal;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ArrayList(java.util.ArrayList) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 65 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class AnnotationUtils method datasetToString.

public static String datasetToString(CoreMap dataset) {
    List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class);
    StringBuffer b = new StringBuffer();
    if (sents != null) {
        for (CoreMap sent : sents) {
            b.append(sentenceToString(sent));
        }
    }
    return b.toString();
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)253 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)172 CoreLabel (edu.stanford.nlp.ling.CoreLabel)102 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)53 Annotation (edu.stanford.nlp.pipeline.Annotation)49 Tree (edu.stanford.nlp.trees.Tree)28 Properties (java.util.Properties)23 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)20 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9