Search in sources :

Example 36 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class MachineReading method annotate.

protected Annotation annotate(Annotation testing, int partition) {
    int partitionIndex = (partition != -1 ? partition : 0);
    // 
    if (MachineReadingProperties.extractEntities) {
        assert (entityExtractor != null);
        Annotation predicted = AnnotationUtils.deepMentionCopy(testing);
        entityExtractor.annotate(predicted);
        for (ResultsPrinter rp : entityResultsPrinterSet) {
            String msg = rp.printResults(testing, predicted);
            MachineReadingProperties.logger.info("Entity extraction results " + (partition != -1 ? "for partition #" + partition : "") + " using printer " + rp.getClass() + ":\n" + msg);
        }
        predictions[ENTITY_LEVEL][partitionIndex] = predicted;
    }
    // 
    if (MachineReadingProperties.extractRelations) {
        assert (relationExtractor != null);
        Annotation predicted = (MachineReadingProperties.testRelationsUsingPredictedEntities ? predictions[ENTITY_LEVEL][partitionIndex] : AnnotationUtils.deepMentionCopy(testing));
        // make sure the entities have the syntactic head and span set. we need this for relation extraction features
        // TODO(AngledLuffa): this call to assignSyntacticHeadToEntities
        // is changing the annotations for the original annotation.
        // This is probably not right?  It can result in changes in the
        // dependencies when run in the pipeline.  For example:
        // "They are such as interested Thomas Aquinas and Bonaventura, Anselm and Bernard."
        // https://github.com/stanfordnlp/CoreNLP/issues/1053
        assignSyntacticHeadToEntities(predicted);
        relationExtractor.annotate(predicted);
        if (relationExtractionPostProcessor == null) {
            relationExtractionPostProcessor = makeExtractor(MachineReadingProperties.relationExtractionPostProcessorClass);
        }
        if (relationExtractionPostProcessor != null) {
            MachineReadingProperties.logger.info("Using relation extraction post processor: " + MachineReadingProperties.relationExtractionPostProcessorClass);
            relationExtractionPostProcessor.annotate(predicted);
        }
        for (ResultsPrinter rp : getRelationResultsPrinterSet()) {
            String msg = rp.printResults(testing, predicted);
            MachineReadingProperties.logger.info("Relation extraction results " + (partition != -1 ? "for partition #" + partition : "") + " using printer " + rp.getClass() + ":\n" + msg);
        }
        // 
        if (consistencyChecker == null) {
            consistencyChecker = makeExtractor(MachineReadingProperties.consistencyCheck);
        }
        if (consistencyChecker != null) {
            MachineReadingProperties.logger.info("Using consistency checker: " + MachineReadingProperties.consistencyCheck);
            consistencyChecker.annotate(predicted);
            for (ResultsPrinter rp : entityResultsPrinterSet) {
                String msg = rp.printResults(testing, predicted);
                MachineReadingProperties.logger.info("Entity extraction results AFTER consistency checks " + (partition != -1 ? "for partition #" + partition : "") + " using printer " + rp.getClass() + ":\n" + msg);
            }
            for (ResultsPrinter rp : getRelationResultsPrinterSet()) {
                String msg = rp.printResults(testing, predicted);
                MachineReadingProperties.logger.info("Relation extraction results AFTER consistency checks " + (partition != -1 ? "for partition #" + partition : "") + " using printer " + rp.getClass() + ":\n" + msg);
            }
        }
        predictions[RELATION_LEVEL][partitionIndex] = predicted;
    }
    return predictions[RELATION_LEVEL][partitionIndex];
}
Also used : SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) TextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation) EntityMentionsAnnotation(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 37 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class MachineReading method run.

/**
 * Performs extraction. This will train a new extraction model and evaluate
 * the model on the test set. Depending on the MachineReading instance's
 * parameters, it may skip training if a model already exists or skip
 * evaluation.
 *
 * returns results string, can be compared in a utest
 */
public List<String> run() throws Exception {
    this.forceRetraining = !MachineReadingProperties.loadModel;
    if (MachineReadingProperties.trainOnly) {
        this.forceRetraining = true;
    }
    List<String> retMsg = new ArrayList<>();
    boolean haveSerializedEntityExtractor = serializedModelExists(MachineReadingProperties.serializedEntityExtractorPath);
    boolean haveSerializedRelationExtractor = serializedModelExists(MachineReadingProperties.serializedRelationExtractorPath);
    boolean haveSerializedEventExtractor = serializedModelExists(MachineReadingProperties.serializedEventExtractorPath);
    Annotation training = null;
    Annotation aux = null;
    if ((MachineReadingProperties.extractEntities && !haveSerializedEntityExtractor) || (MachineReadingProperties.extractRelations && !haveSerializedRelationExtractor) || (MachineReadingProperties.extractEvents && !haveSerializedEventExtractor) || this.forceRetraining || MachineReadingProperties.crossValidate) {
        // load training sentences
        training = loadOrMakeSerializedSentences(MachineReadingProperties.trainPath, reader, new File(MachineReadingProperties.serializedTrainingSentencesPath));
        if (auxReader != null) {
            MachineReadingProperties.logger.severe("Reading auxiliary dataset from " + MachineReadingProperties.auxDataPath + "...");
            aux = loadOrMakeSerializedSentences(MachineReadingProperties.auxDataPath, auxReader, new File(MachineReadingProperties.serializedAuxTrainingSentencesPath));
            MachineReadingProperties.logger.severe("Done reading auxiliary dataset.");
        }
    }
    Annotation testing = null;
    if (!MachineReadingProperties.trainOnly && !MachineReadingProperties.crossValidate) {
        // load test sentences
        File serializedTestSentences = new File(MachineReadingProperties.serializedTestSentencesPath);
        testing = loadOrMakeSerializedSentences(MachineReadingProperties.testPath, reader, serializedTestSentences);
    }
    // 
    // create the actual datasets to be used for training and annotation
    // 
    makeDataSets(training, testing, aux);
    // 
    for (int partition = 0; partition < datasets.length; partition++) {
        assert (datasets.length > partition);
        assert (datasets[partition] != null);
        assert (MachineReadingProperties.trainOnly || datasets[partition].second() != null);
        // train all models
        train(datasets[partition].first(), (MachineReadingProperties.crossValidate ? partition : -1));
        // annotate using all models
        if (!MachineReadingProperties.trainOnly) {
            MachineReadingProperties.logger.info("annotating partition " + partition);
            annotate(datasets[partition].second(), (MachineReadingProperties.crossValidate ? partition : -1));
        }
    }
    // 
    if (!MachineReadingProperties.trainOnly) {
        // merge test sets for the gold data
        Annotation gold = new Annotation("");
        for (Pair<Annotation, Annotation> dataset : datasets) AnnotationUtils.addSentences(gold, dataset.second().get(SentencesAnnotation.class));
        // merge test sets with predicted annotations
        Annotation[] mergedPredictions = new Annotation[3];
        assert (predictions != null);
        for (int taskLevel = 0; taskLevel < mergedPredictions.length; taskLevel++) {
            mergedPredictions[taskLevel] = new Annotation("");
            for (int fold = 0; fold < predictions[taskLevel].length; fold++) {
                if (predictions[taskLevel][fold] == null)
                    continue;
                AnnotationUtils.addSentences(mergedPredictions[taskLevel], predictions[taskLevel][fold].get(CoreAnnotations.SentencesAnnotation.class));
            }
        }
        // 
        if (MachineReadingProperties.extractEntities && !entityResultsPrinterSet.isEmpty()) {
            retMsg.addAll(printTask("entity extraction", entityResultsPrinterSet, gold, mergedPredictions[ENTITY_LEVEL]));
        }
        if (MachineReadingProperties.extractRelations && !getRelationResultsPrinterSet().isEmpty()) {
            retMsg.addAll(printTask("relation extraction", getRelationResultsPrinterSet(), gold, mergedPredictions[RELATION_LEVEL]));
        }
        // 
        if (MachineReadingProperties.extractEntities && MachineReadingProperties.serializedEntityExtractionResults != null)
            IOUtils.writeObjectToFile(mergedPredictions[ENTITY_LEVEL], MachineReadingProperties.serializedEntityExtractionResults);
        if (MachineReadingProperties.extractRelations && MachineReadingProperties.serializedRelationExtractionResults != null)
            IOUtils.writeObjectToFile(mergedPredictions[RELATION_LEVEL], MachineReadingProperties.serializedRelationExtractionResults);
        if (MachineReadingProperties.extractEvents && MachineReadingProperties.serializedEventExtractionResults != null)
            IOUtils.writeObjectToFile(mergedPredictions[EVENT_LEVEL], MachineReadingProperties.serializedEventExtractionResults);
    }
    return retMsg;
}
Also used : SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) ArrayList(java.util.ArrayList) File(java.io.File) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) TextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation) EntityMentionsAnnotation(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 38 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class MachineReading method loadOrMakeSerializedSentences.

/**
 * Gets the serialized sentences for a data set. If the serialized sentences
 * are already on disk, it loads them from there. Otherwise, the data set is
 * read with the corpus reader and the serialized sentences are saved to disk.
 *
 * @param sentencesPath Llocation of the raw data set
 * @param reader The corpus reader
 * @param serializedSentences Where the serialized sentences should be stored on disk
 * @return A list of RelationsSentences
 */
private Annotation loadOrMakeSerializedSentences(String sentencesPath, GenericDataSetReader reader, File serializedSentences) throws IOException, ClassNotFoundException {
    Annotation corpusSentences;
    // and and save the serialized file to disk
    if (MachineReadingProperties.serializeCorpora && serializedSentences.exists() && !forceParseSentences) {
        MachineReadingProperties.logger.info("Loaded serialized sentences from " + serializedSentences.getAbsolutePath() + "...");
        corpusSentences = IOUtils.readObjectFromFile(serializedSentences);
        MachineReadingProperties.logger.info("Done. Loaded " + corpusSentences.get(CoreAnnotations.SentencesAnnotation.class).size() + " sentences.");
    } else {
        // read the corpus
        MachineReadingProperties.logger.info("Parsing corpus sentences...");
        if (MachineReadingProperties.serializeCorpora)
            MachineReadingProperties.logger.info("These sentences will be serialized to " + serializedSentences.getAbsolutePath());
        corpusSentences = reader.parse(sentencesPath);
        MachineReadingProperties.logger.info("Done. Parsed " + AnnotationUtils.sentenceCount(corpusSentences) + " sentences.");
        // save corpusSentences
        if (MachineReadingProperties.serializeCorpora) {
            MachineReadingProperties.logger.info("Serializing parsed sentences to " + serializedSentences.getAbsolutePath() + "...");
            IOUtils.writeObjectToFile(corpusSentences, serializedSentences);
            MachineReadingProperties.logger.info("Done. Serialized " + AnnotationUtils.sentenceCount(corpusSentences) + " sentences.");
        }
    }
    return corpusSentences;
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) TextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation) EntityMentionsAnnotation(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 39 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class AceReader method read.

/**
 * Reads in ACE*.apf.xml files and converts them to RelationSentence objects.
 * Note that you probably should call parse() instead.
 *
 * Currently, this ignores document boundaries (the list returned will include
 * sentences from all documents).
 *
 * @param path directory containing ACE files to read (e.g.
 *          "/home/mcclosky/scr/data/ACE2005/english_test"). This can also be
 *          the path to a single file. *
 * @return list of RelationSentence objects
 */
@Override
public Annotation read(String path) throws IOException, SAXException, ParserConfigurationException {
    List<CoreMap> allSentences = new ArrayList<>();
    File basePath = new File(path);
    assert basePath.exists();
    Annotation corpus = new Annotation("");
    if (basePath.isDirectory()) {
        for (File aceFile : IOUtils.iterFilesRecursive(basePath, ".apf.xml")) {
            if (aceFile.getName().endsWith(".UPC1.apf.xml")) {
                continue;
            }
            allSentences.addAll(readDocument(aceFile, corpus));
        }
    } else {
        // in case it's a file
        allSentences.addAll(readDocument(basePath, corpus));
    }
    AnnotationUtils.addSentences(corpus, allSentences);
    // quick stats
    if (VERBOSE) {
        printCounter(entityCounts, "entity mention");
        printCounter(relationCounts, "relation mention");
        printCounter(eventCounts, "event mention");
    }
    for (CoreMap sent : allSentences) {
        // check for entity mentions of the same type that are adjacent
        countAdjacentMentions(sent);
        // count relations between two proper nouns
        countNameRelations(sent);
        // count types of mentions
        countMentionTypes(sent);
    }
    if (VERBOSE) {
        printCounter(adjacentEntityMentions, "adjacent entity mention");
        printCounter(nameRelationCounts, "name relation mention");
        printCounter(mentionTypeCounts, "mention type counts");
    }
    return corpus;
}
Also used : ArrayList(java.util.ArrayList) CoreMap(edu.stanford.nlp.util.CoreMap) File(java.io.File) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 40 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class DependencyParserCoreNLPDemo method main.

public static void main(String[] args) {
    String text;
    if (args.length > 0) {
        text = IOUtils.slurpFileNoExceptions(args[0], "utf-8");
    } else {
        text = "I can almost always tell when movies use fake dinosaurs.";
    }
    Annotation ann = new Annotation(text);
    Properties props = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,depparse", "depparse.model", DependencyParser.DEFAULT_MODEL);
    AnnotationPipeline pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);
    for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
        SemanticGraph sg = sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
        log.info(IOUtils.eolChar + sg.toString(SemanticGraph.OutputFormat.LIST));
    }
}
Also used : AnnotationPipeline(edu.stanford.nlp.pipeline.AnnotationPipeline) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)138 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)84 CoreMap (edu.stanford.nlp.util.CoreMap)77 CoreLabel (edu.stanford.nlp.ling.CoreLabel)48 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)31 ArrayList (java.util.ArrayList)31 Properties (java.util.Properties)28 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 Test (org.junit.Test)18 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)15 Tree (edu.stanford.nlp.trees.Tree)14 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)12 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)11 IOException (java.io.IOException)11 CorefChain (edu.stanford.nlp.coref.data.CorefChain)10 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)10