Search in sources :

Example 61 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class NumberSequenceClassifier method buildSentenceFromTokens.

private static CoreMap buildSentenceFromTokens(List<CoreLabel> tokens, Integer characterOffsetStart, Integer characterOffsetEnd) {
    //
    // Recover the sentence text:
    // a) try to get it from TextAnnotation
    // b) if not present, build it from the OriginalTextAnnotation of each token
    // c) if not present, build it from the TextAnnotation of each token
    //
    boolean adjustCharacterOffsets = false;
    // try to recover the text from the original tokens
    String text = buildText(tokens, CoreAnnotations.OriginalTextAnnotation.class);
    if (text == null) {
        text = buildText(tokens, CoreAnnotations.TextAnnotation.class);
        // character offset will point to the original tokens
        //   so we need to align them to the text built from normalized tokens
        adjustCharacterOffsets = true;
        if (text == null) {
            throw new RuntimeException("ERROR: to use SUTime, sentences must have TextAnnotation set, or the individual tokens must have OriginalTextAnnotation or TextAnnotation set!");
        }
    }
    // make sure token character offsets are aligned with text
    List<CoreLabel> tokenSequence = copyTokens(tokens, adjustCharacterOffsets, false);
    Annotation newSentence = new Annotation(text);
    newSentence.set(CoreAnnotations.TokensAnnotation.class, tokenSequence);
    if (!adjustCharacterOffsets && characterOffsetStart != null && characterOffsetEnd != null) {
        newSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, characterOffsetStart);
        newSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, characterOffsetEnd);
    } else {
        int tokenCharStart = tokenSequence.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
        int tokenCharEnd = tokenSequence.get(tokenSequence.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        newSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, tokenCharStart);
        newSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, tokenCharEnd);
    }
    // some default token offsets
    newSentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
    newSentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenSequence.size());
    return newSentence;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 62 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class MachineReading method makeDataSets.

@SuppressWarnings("unchecked")
private void makeDataSets(Annotation training, Annotation testing, Annotation auxDataset) {
    if (!MachineReadingProperties.crossValidate) {
        datasets = new Pair[1];
        Annotation trainingEnhanced = training;
        if (auxDataset != null) {
            trainingEnhanced = new Annotation(training.get(TextAnnotation.class));
            for (int i = 0; i < AnnotationUtils.sentenceCount(training); i++) {
                AnnotationUtils.addSentence(trainingEnhanced, AnnotationUtils.getSentence(training, i));
            }
            for (int ind = 0; ind < AnnotationUtils.sentenceCount(auxDataset); ind++) {
                AnnotationUtils.addSentence(trainingEnhanced, AnnotationUtils.getSentence(auxDataset, ind));
            }
        }
        datasets[0] = new Pair<>(trainingEnhanced, testing);
        predictions = new Annotation[3][1];
    } else {
        assert (MachineReadingProperties.kfold > 1);
        datasets = new Pair[MachineReadingProperties.kfold];
        AnnotationUtils.shuffleSentences(training);
        for (int partition = 0; partition < MachineReadingProperties.kfold; partition++) {
            int begin = AnnotationUtils.sentenceCount(training) * partition / MachineReadingProperties.kfold;
            int end = AnnotationUtils.sentenceCount(training) * (partition + 1) / MachineReadingProperties.kfold;
            MachineReadingProperties.logger.info("Creating partition #" + partition + " using offsets [" + begin + ", " + end + ") out of " + AnnotationUtils.sentenceCount(training));
            Annotation partitionTrain = new Annotation("");
            Annotation partitionTest = new Annotation("");
            for (int i = 0; i < AnnotationUtils.sentenceCount(training); i++) {
                if (i < begin) {
                    AnnotationUtils.addSentence(partitionTrain, AnnotationUtils.getSentence(training, i));
                } else if (i < end) {
                    AnnotationUtils.addSentence(partitionTest, AnnotationUtils.getSentence(training, i));
                } else {
                    AnnotationUtils.addSentence(partitionTrain, AnnotationUtils.getSentence(training, i));
                }
            }
            // for learning curve experiments
            // partitionTrain = keepPercentage(partitionTrain, percentageOfTrain);
            partitionTrain = keepPercentage(partitionTrain, MachineReadingProperties.percentageOfTrain);
            if (auxDataset != null) {
                for (int ind = 0; ind < AnnotationUtils.sentenceCount(auxDataset); ind++) {
                    AnnotationUtils.addSentence(partitionTrain, AnnotationUtils.getSentence(auxDataset, ind));
                }
            }
            datasets[partition] = new Pair<>(partitionTrain, partitionTest);
        }
        predictions = new Annotation[3][MachineReadingProperties.kfold];
    }
}
Also used : SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) TextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation) EntityMentionsAnnotation(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 63 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class AnnotationUtils method deepMentionCopy.

/**
   * Creates a deep copy of the given dataset with new lists for all mentions (entity, relation, event)
   * @param dataset
   */
public static Annotation deepMentionCopy(CoreMap dataset) {
    Annotation newDataset = new Annotation("");
    List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class);
    List<CoreMap> newSents = new ArrayList<>();
    if (sents != null) {
        for (CoreMap sent : sents) {
            if (!(sent instanceof Annotation)) {
                throw new RuntimeException("ERROR: Sentences must instantiate Annotation!");
            }
            CoreMap newSent = sentenceDeepMentionCopy((Annotation) sent);
            newSents.add(newSent);
        }
    }
    addSentences(newDataset, newSents);
    return newDataset;
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 64 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class AnnotationUtils method sentenceDeepMentionCopy.

/**
   * Deep copy of the sentence: we create new entity/relation/event lists here.
   * However,  we do not deep copy the ExtractionObjects themselves!
   * @param sentence
   */
public static Annotation sentenceDeepMentionCopy(Annotation sentence) {
    Annotation newSent = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
    newSent.set(CoreAnnotations.TokensAnnotation.class, sentence.get(CoreAnnotations.TokensAnnotation.class));
    newSent.set(TreeCoreAnnotations.TreeAnnotation.class, sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
    newSent.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class));
    newSent.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
    newSent.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class));
    newSent.set(CoreAnnotations.DocIDAnnotation.class, sentence.get(CoreAnnotations.DocIDAnnotation.class));
    // deep copy of all mentions lists
    List<EntityMention> ents = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
    if (ents != null)
        newSent.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList<>(ents));
    List<RelationMention> rels = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
    if (rels != null)
        newSent.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, new ArrayList<>(rels));
    List<EventMention> evs = sentence.get(MachineReadingAnnotations.EventMentionsAnnotation.class);
    if (evs != null)
        newSent.set(MachineReadingAnnotations.EventMentionsAnnotation.class, new ArrayList<>(evs));
    return newSent;
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)

Example 65 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class OpenIEDemo method main.

public static void main(String[] args) throws Exception {
    // Create the Stanford CoreNLP pipeline
    Properties props = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // Annotate an example document.
    String text;
    if (args.length > 0) {
        text = IOUtils.slurpFile(args[0]);
    } else {
        text = "Obama was born in Hawaii. He is our president.";
    }
    Annotation doc = new Annotation(text);
    pipeline.annotate(doc);
    // Loop over sentences in the document
    int sentNo = 0;
    for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
        System.out.println("Sentence #" + ++sentNo + ": " + sentence.get(CoreAnnotations.TextAnnotation.class));
        // Print SemanticGraph
        System.out.println(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
        // Get the OpenIE triples for the sentence
        Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
        // Print the triples
        for (RelationTriple triple : triples) {
            System.out.println(triple.confidence + "\t" + triple.subjectLemmaGloss() + "\t" + triple.relationLemmaGloss() + "\t" + triple.objectLemmaGloss());
        }
        // Alternately, to only run e.g., the clause splitter:
        List<SentenceFragment> clauses = new OpenIE(props).clausesInSentence(sentence);
        for (SentenceFragment clause : clauses) {
            System.out.println(clause.parseTree.toString(SemanticGraph.OutputFormat.LIST));
        }
        System.out.println();
    }
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)91 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)58 CoreMap (edu.stanford.nlp.util.CoreMap)50 CoreLabel (edu.stanford.nlp.ling.CoreLabel)30 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)27 ArrayList (java.util.ArrayList)25 Properties (java.util.Properties)25 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)19 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)14 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)13 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)11 Tree (edu.stanford.nlp.trees.Tree)10 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)8 IOException (java.io.IOException)8 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)7 CorefChain (edu.stanford.nlp.coref.data.CorefChain)6 EntityMentionsAnnotation (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation)6 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)6