Search in sources :

Example 6 with EntityMention

use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method preProcessSentences.

/**
   * Take a dataset Annotation, generate their parse trees and identify syntactic heads (and head spans, if necessary)
   */
public void preProcessSentences(Annotation dataset) {
    logger.severe("GenericDataSetReader: Started pre-processing the corpus...");
    // run the processor, i.e., NER, parse etc.
    if (processor != null) {
        // we might already have syntactic annotation from offline files
        List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
        if (sentences.size() > 0 && !sentences.get(0).containsKey(TreeCoreAnnotations.TreeAnnotation.class)) {
            logger.info("Annotating dataset with " + processor);
            processor.annotate(dataset);
        } else {
            logger.info("Found existing syntactic annotations. Will not use the NLP processor.");
        }
    }
    /*
    List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
    for(int i = 0; i < sentences.size(); i ++){
      CoreMap sent = sentences.get(i);
      List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class);
      logger.info("Tokens for sentence #" + i + ": " + tokens);
      logger.info("Parse tree for sentence #" + i + ": " + sent.get(TreeCoreAnnotations.TreeAnnotation.class).pennString());
    }
    */
    List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
    logger.fine("Extracted " + sentences.size() + " sentences.");
    for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        logger.fine("Processing sentence " + tokens);
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        if (tree == null)
            throw new RuntimeException("ERROR: MR requires full syntactic analysis!");
        // convert tree labels to CoreLabel if necessary
        // we need this because we store additional info in the CoreLabel, such as the spans of each tree
        convertToCoreLabels(tree);
        // store the tree spans, if not present already
        CoreLabel l = (CoreLabel) tree.label();
        if (forceGenerationOfIndexSpans || (!l.containsKey(CoreAnnotations.BeginIndexAnnotation.class) && !l.containsKey(CoreAnnotations.EndIndexAnnotation.class))) {
            tree.indexSpans(0);
            logger.fine("Index spans were generated.");
        } else {
            logger.fine("Index spans were NOT generated.");
        }
        logger.fine("Parse tree using CoreLabel:\n" + tree.pennString());
        //
        if (sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) {
            for (EntityMention ent : sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
                logger.fine("Finding head for entity: " + ent);
                int headPos = assignSyntacticHead(ent, tree, tokens, calculateHeadSpan);
                logger.fine("Syntactic head of mention \"" + ent + "\" is: " + tokens.get(headPos).word());
                assert (ent.getExtent() != null);
                assert (ent.getHead() != null);
                assert (ent.getSyntacticHeadTokenPosition() >= 0);
            }
        }
    }
    logger.severe("GenericDataSetReader: Pre-processing complete.");
}
Also used : MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 7 with EntityMention

use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method modifyUsingCoreNLPNER.

private void modifyUsingCoreNLPNER(Annotation doc) {
    Properties ann = new Properties();
    ann.setProperty("annotators", "pos, lemma, ner");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
    pipeline.annotate(doc);
    for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
        List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (entities != null) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (EntityMention en : entities) {
                //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType());
                Span s = en.getExtent();
                Counter<String> allNertagforSpan = new ClassicCounter<>();
                for (int i = s.start(); i < s.end(); i++) {
                    allNertagforSpan.incrementCount(tokens.get(i).ner());
                }
                String entityNertag = Counters.argmax(allNertagforSpan);
                en.setType(entityNertag);
            //System.out.println("new ner tag is " + entityNertag);
            }
        }
    }
}
Also used : Properties(java.util.Properties) Span(edu.stanford.nlp.ie.machinereading.structure.Span) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 8 with EntityMention

use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.

the class RothCONLL04Reader method readSentence.

private Annotation readSentence(String docId, Iterator<String> lineIterator) {
    Annotation sentence = new Annotation("");
    sentence.set(CoreAnnotations.DocIDAnnotation.class, docId);
    sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList<>());
    // we'll need to set things like the tokens and textContent after we've
    // fully read the sentence
    // contains the full text that we've read so far
    StringBuilder textContent = new StringBuilder();
    // how many tokens we've seen so far
    int tokenCount = 0;
    List<CoreLabel> tokens = new ArrayList<>();
    // when we've seen two blank lines in a row, this sentence is over (one
    // blank line separates the sentence and the relations
    int numBlankLinesSeen = 0;
    String sentenceID = null;
    // keeps tracks of entities we've seen so far for use by relations
    Map<String, EntityMention> indexToEntityMention = new HashMap<>();
    while (lineIterator.hasNext() && numBlankLinesSeen < 2) {
        String currentLine = lineIterator.next();
        currentLine = currentLine.replace("COMMA", ",");
        List<String> pieces = StringUtils.split(currentLine);
        String identifier;
        int size = pieces.size();
        switch(size) {
            case // blank line between sentences or relations
            1:
                numBlankLinesSeen++;
                break;
            case // relation
            3:
                String type = pieces.get(2);
                List<ExtractionObject> args = new ArrayList<>();
                EntityMention entity1 = indexToEntityMention.get(pieces.get(0));
                EntityMention entity2 = indexToEntityMention.get(pieces.get(1));
                args.add(entity1);
                args.add(entity2);
                Span span = new Span(entity1.getExtentTokenStart(), entity2.getExtentTokenEnd());
                // identifier = "relation" + sentenceID + "-" + sentence.getAllRelations().size();
                identifier = RelationMention.makeUniqueId();
                RelationMention relationMention = new RelationMention(identifier, sentence, span, type, null, args);
                AnnotationUtils.addRelationMention(sentence, relationMention);
                break;
            case // token
            9:
                /*
         * Roth token lines look like this:
         *
         * 19 Peop 9 O NNP/NNP Jamal/Ghosheh O O O
         */
                // Entities may be multiple words joined by '/'; we split these up
                List<String> words = StringUtils.split(pieces.get(5), "/");
                //List<String> postags = StringUtils.split(pieces.get(4),"/");
                String text = StringUtils.join(words, " ");
                identifier = "entity" + pieces.get(0) + '-' + pieces.get(2);
                // entity type of the word/expression
                String nerTag = getNormalizedNERTag(pieces.get(1));
                if (sentenceID == null)
                    sentenceID = pieces.get(0);
                if (!nerTag.equals("O")) {
                    Span extentSpan = new Span(tokenCount, tokenCount + words.size());
                    // Temporarily sets the head span to equal the extent span.
                    // This is so the entity has a head (in particular, getValue() works) even if preprocessSentences isn't called.
                    // The head span is later modified if preprocessSentences is called.
                    EntityMention entity = new EntityMention(identifier, sentence, extentSpan, extentSpan, nerTag, null, null);
                    AnnotationUtils.addEntityMention(sentence, entity);
                    // we can get by using these indices as strings since we only use them
                    // as a hash key
                    String index = pieces.get(2);
                    indexToEntityMention.put(index, entity);
                }
                // int i =0;
                for (String word : words) {
                    CoreLabel label = new CoreLabel();
                    label.setWord(word);
                    //label.setTag(postags.get(i));
                    label.set(CoreAnnotations.TextAnnotation.class, word);
                    label.set(CoreAnnotations.ValueAnnotation.class, word);
                    // we don't set TokenBeginAnnotation or TokenEndAnnotation since we're
                    // not keeping track of character offsets
                    tokens.add(label);
                // i++;
                }
                textContent.append(text);
                textContent.append(' ');
                tokenCount += words.size();
                break;
        }
    }
    sentence.set(CoreAnnotations.TextAnnotation.class, textContent.toString());
    sentence.set(CoreAnnotations.ValueAnnotation.class, textContent.toString());
    sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
    sentence.set(CoreAnnotations.SentenceIDAnnotation.class, sentenceID);
    return sentence;
}
Also used : RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Annotation(edu.stanford.nlp.pipeline.Annotation) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) ExtractionObject(edu.stanford.nlp.ie.machinereading.structure.ExtractionObject) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Example 9 with EntityMention

use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.

the class AceReader method readDocument.

/**
   * Reads in a single ACE*.apf.xml file and convert it to RelationSentence
   * objects. However, you probably should call parse() instead.
   *
   * @param prefix prefix of ACE filename to read (e.g.
   *          "/u/mcclosky/scr/data/ACE2005/english_test/bc/CNN_CF_20030827.1630.01"
   *          ) (no ".apf.xml" extension)
   * @return list of RelationSentence objects
   */
private List<CoreMap> readDocument(String prefix, Annotation corpus) throws IOException, SAXException, ParserConfigurationException {
    logger.info("Reading document: " + prefix);
    List<CoreMap> results = new ArrayList<>();
    AceDocument aceDocument;
    if (aceVersion.equals("ACE2004")) {
        aceDocument = AceDocument.parseDocument(prefix, false, aceVersion);
    } else {
        aceDocument = AceDocument.parseDocument(prefix, false);
    }
    String docId = aceDocument.getId();
    // map entity mention ID strings to their EntityMention counterparts
    Map<String, EntityMention> entityMentionMap = Generics.newHashMap();
    /*
    for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
      List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
      StringBuffer b = new StringBuffer();
      for(AceToken t: tokens) b.append(t.getLiteral() + " " );
      logger.info("SENTENCE: " + b.toString());
    }
    */
    int tokenOffset = 0;
    for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
        List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
        List<CoreLabel> words = new ArrayList<>();
        StringBuilder textContent = new StringBuilder();
        for (int i = 0; i < tokens.size(); i++) {
            CoreLabel l = new CoreLabel();
            l.setWord(tokens.get(i).getLiteral());
            l.set(CoreAnnotations.ValueAnnotation.class, l.word());
            l.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, tokens.get(i).getByteStart());
            l.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, tokens.get(i).getByteEnd());
            words.add(l);
            if (i > 0)
                textContent.append(" ");
            textContent.append(tokens.get(i).getLiteral());
        }
        // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer)
        if (words.size() == 1) {
            String word = words.get(0).word();
            if (word.startsWith("<") && word.endsWith(">")) {
                tokenOffset += tokens.size();
                continue;
            }
        }
        CoreMap sentence = new Annotation(textContent.toString());
        sentence.set(CoreAnnotations.DocIDAnnotation.class, docId);
        sentence.set(CoreAnnotations.TokensAnnotation.class, words);
        logger.info("Reading sentence: \"" + textContent + "\"");
        List<AceEntityMention> entityMentions = aceDocument.getEntityMentions(sentenceIndex);
        List<AceRelationMention> relationMentions = aceDocument.getRelationMentions(sentenceIndex);
        List<AceEventMention> eventMentions = aceDocument.getEventMentions(sentenceIndex);
        // convert entity mentions
        for (AceEntityMention aceEntityMention : entityMentions) {
            String corefID = "";
            for (String entityID : aceDocument.getKeySetEntities()) {
                AceEntity e = aceDocument.getEntity(entityID);
                if (e.getMentions().contains(aceEntityMention)) {
                    corefID = entityID;
                    break;
                }
            }
            EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID);
            //        EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset);
            entityCounts.incrementCount(convertedMention.getType());
            logger.info("CONVERTED MENTION HEAD SPAN: " + convertedMention.getHead());
            logger.info("CONVERTED ENTITY MENTION: " + convertedMention);
            AnnotationUtils.addEntityMention(sentence, convertedMention);
            entityMentionMap.put(aceEntityMention.getId(), convertedMention);
        // TODO: make Entity objects as needed
        }
        // convert relation mentions
        for (AceRelationMention aceRelationMention : relationMentions) {
            RelationMention convertedMention = convertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap);
            if (convertedMention != null) {
                relationCounts.incrementCount(convertedMention.getType());
                logger.info("CONVERTED RELATION MENTION: " + convertedMention);
                AnnotationUtils.addRelationMention(sentence, convertedMention);
            }
        // TODO: make Relation objects
        }
        // convert EventMentions
        for (AceEventMention aceEventMention : eventMentions) {
            EventMention convertedMention = convertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset);
            if (convertedMention != null) {
                eventCounts.incrementCount(convertedMention.getType());
                logger.info("CONVERTED EVENT MENTION: " + convertedMention);
                AnnotationUtils.addEventMention(sentence, convertedMention);
            }
        // TODO: make Event objects
        }
        results.add(sentence);
        tokenOffset += tokens.size();
    }
    return results;
}
Also used : EventMention(edu.stanford.nlp.ie.machinereading.structure.EventMention) AceEventMention(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEventMention) AceRelationMention(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceRelationMention) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) ArrayList(java.util.ArrayList) AceEntity(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntity) AceEventMention(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEventMention) Annotation(edu.stanford.nlp.pipeline.Annotation) AceDocument(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceDocument) CoreLabel(edu.stanford.nlp.ling.CoreLabel) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) AceEntityMention(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntityMention) AceToken(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceToken) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) AceRelationMention(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceRelationMention) AceEntityMention(edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntityMention) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 10 with EntityMention

use of edu.stanford.nlp.ie.machinereading.structure.EntityMention in project CoreNLP by stanfordnlp.

the class ProtobufAnnotationSerializer method toProtoBuilder.

/**
   * <p>
   *   The method to extend by subclasses of the Protobuf Annotator if custom additions are added to Tokens.
   *   In contrast to {@link ProtobufAnnotationSerializer#toProto(edu.stanford.nlp.ling.CoreLabel)}, this function
   *   returns a builder that can be extended.
   * </p>
   *
   * @param sentence The sentence to save to a protocol buffer
   * @param keysToSerialize A set tracking which keys have been saved. It's important to remove any keys added to the proto
   *                        from this set, as the code tracks annotations to ensure lossless serialization.
   */
@SuppressWarnings("deprecation")
protected CoreNLPProtos.Sentence.Builder toProtoBuilder(CoreMap sentence, Set<Class<?>> keysToSerialize) {
    // Error checks
    if (sentence instanceof CoreLabel) {
        throw new IllegalArgumentException("CoreMap is actually a CoreLabel");
    }
    CoreNLPProtos.Sentence.Builder builder = CoreNLPProtos.Sentence.newBuilder();
    // Remove items serialized elsewhere from the required list
    keysToSerialize.remove(TextAnnotation.class);
    keysToSerialize.remove(NumerizedTokensAnnotation.class);
    // Required fields
    builder.setTokenOffsetBegin(getAndRegister(sentence, keysToSerialize, TokenBeginAnnotation.class));
    builder.setTokenOffsetEnd(getAndRegister(sentence, keysToSerialize, TokenEndAnnotation.class));
    // Get key set of CoreMap
    Set<Class<?>> keySet;
    if (sentence instanceof ArrayCoreMap) {
        keySet = ((ArrayCoreMap) sentence).keySetNotNull();
    } else {
        keySet = new IdentityHashSet<>(sentence.keySet());
    }
    // Tokens
    if (sentence.containsKey(TokensAnnotation.class)) {
        for (CoreLabel tok : sentence.get(TokensAnnotation.class)) {
            builder.addToken(toProto(tok));
        }
        keysToSerialize.remove(TokensAnnotation.class);
    }
    // Characters
    if (sentence.containsKey(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
        for (CoreLabel c : sentence.get(SegmenterCoreAnnotations.CharactersAnnotation.class)) {
            builder.addCharacter(toProto(c));
        }
        keysToSerialize.remove(SegmenterCoreAnnotations.CharactersAnnotation.class);
    }
    // Optional fields
    if (keySet.contains(SentenceIndexAnnotation.class)) {
        builder.setSentenceIndex(getAndRegister(sentence, keysToSerialize, SentenceIndexAnnotation.class));
    }
    if (keySet.contains(CharacterOffsetBeginAnnotation.class)) {
        builder.setCharacterOffsetBegin(getAndRegister(sentence, keysToSerialize, CharacterOffsetBeginAnnotation.class));
    }
    if (keySet.contains(CharacterOffsetEndAnnotation.class)) {
        builder.setCharacterOffsetEnd(getAndRegister(sentence, keysToSerialize, CharacterOffsetEndAnnotation.class));
    }
    if (keySet.contains(TreeAnnotation.class)) {
        builder.setParseTree(toProto(getAndRegister(sentence, keysToSerialize, TreeAnnotation.class)));
    }
    if (keySet.contains(BinarizedTreeAnnotation.class)) {
        builder.setBinarizedParseTree(toProto(getAndRegister(sentence, keysToSerialize, BinarizedTreeAnnotation.class)));
    }
    if (keySet.contains(KBestTreesAnnotation.class)) {
        for (Tree tree : sentence.get(KBestTreesAnnotation.class)) {
            builder.addKBestParseTrees(toProto(tree));
            keysToSerialize.remove(KBestTreesAnnotation.class);
        }
    }
    if (keySet.contains(SentimentCoreAnnotations.SentimentAnnotatedTree.class)) {
        builder.setAnnotatedParseTree(toProto(getAndRegister(sentence, keysToSerialize, SentimentCoreAnnotations.SentimentAnnotatedTree.class)));
    }
    if (keySet.contains(SentimentCoreAnnotations.SentimentClass.class)) {
        builder.setSentiment(getAndRegister(sentence, keysToSerialize, SentimentCoreAnnotations.SentimentClass.class));
    }
    if (keySet.contains(BasicDependenciesAnnotation.class)) {
        builder.setBasicDependencies(toProto(getAndRegister(sentence, keysToSerialize, BasicDependenciesAnnotation.class)));
    }
    if (keySet.contains(CollapsedDependenciesAnnotation.class)) {
        builder.setCollapsedDependencies(toProto(getAndRegister(sentence, keysToSerialize, CollapsedDependenciesAnnotation.class)));
    }
    if (keySet.contains(CollapsedCCProcessedDependenciesAnnotation.class)) {
        builder.setCollapsedCCProcessedDependencies(toProto(getAndRegister(sentence, keysToSerialize, CollapsedCCProcessedDependenciesAnnotation.class)));
    }
    if (keySet.contains(AlternativeDependenciesAnnotation.class)) {
        builder.setAlternativeDependencies(toProto(getAndRegister(sentence, keysToSerialize, AlternativeDependenciesAnnotation.class)));
    }
    if (keySet.contains(EnhancedDependenciesAnnotation.class)) {
        builder.setEnhancedDependencies(toProto(getAndRegister(sentence, keysToSerialize, EnhancedDependenciesAnnotation.class)));
    }
    if (keySet.contains(EnhancedPlusPlusDependenciesAnnotation.class)) {
        builder.setEnhancedPlusPlusDependencies(toProto(getAndRegister(sentence, keysToSerialize, EnhancedPlusPlusDependenciesAnnotation.class)));
    }
    if (keySet.contains(TokensAnnotation.class) && getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).size() > 0 && getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).get(0).containsKey(ParagraphAnnotation.class)) {
        builder.setParagraph(getAndRegister(sentence, keysToSerialize, TokensAnnotation.class).get(0).get(ParagraphAnnotation.class));
    }
    if (keySet.contains(NumerizedTokensAnnotation.class)) {
        builder.setHasNumerizedTokensAnnotation(true);
    } else {
        builder.setHasNumerizedTokensAnnotation(false);
    }
    if (keySet.contains(NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
        for (SentenceFragment entailedSentence : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
            builder.addEntailedSentence(toProto(entailedSentence));
        }
    }
    if (keySet.contains(NaturalLogicAnnotations.EntailedClausesAnnotation.class)) {
        for (SentenceFragment entailedClause : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.EntailedClausesAnnotation.class)) {
            builder.addEntailedClause(toProto(entailedClause));
        }
    }
    if (keySet.contains(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
        for (RelationTriple triple : getAndRegister(sentence, keysToSerialize, NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
            builder.addOpenieTriple(toProto(triple));
        }
    }
    if (keySet.contains(KBPTriplesAnnotation.class)) {
        for (RelationTriple triple : getAndRegister(sentence, keysToSerialize, KBPTriplesAnnotation.class)) {
            builder.addKbpTriple(toProto(triple));
        }
    }
    // Non-default annotators
    if (keySet.contains(EntityMentionsAnnotation.class)) {
        builder.setHasRelationAnnotations(true);
        for (EntityMention entity : getAndRegister(sentence, keysToSerialize, EntityMentionsAnnotation.class)) {
            builder.addEntity(toProto(entity));
        }
    } else {
        builder.setHasRelationAnnotations(false);
    }
    if (keySet.contains(RelationMentionsAnnotation.class)) {
        if (!builder.getHasRelationAnnotations()) {
            throw new IllegalStateException("Registered entity mentions without relation mentions");
        }
        for (RelationMention relation : getAndRegister(sentence, keysToSerialize, RelationMentionsAnnotation.class)) {
            builder.addRelation(toProto(relation));
        }
    }
    // add each of the mentions in the List<Mentions> for this sentence
    if (keySet.contains(CorefMentionsAnnotation.class)) {
        builder.setHasCorefMentionsAnnotation(true);
        for (Mention m : sentence.get(CorefMentionsAnnotation.class)) {
            builder.addMentionsForCoref(toProto(m));
        }
        keysToSerialize.remove(CorefMentionsAnnotation.class);
    }
    // Entity mentions
    if (keySet.contains(MentionsAnnotation.class)) {
        for (CoreMap mention : sentence.get(MentionsAnnotation.class)) {
            builder.addMentions(toProtoMention(mention));
        }
        keysToSerialize.remove(MentionsAnnotation.class);
    }
    // add a sentence id if it exists
    if (keySet.contains(SentenceIDAnnotation.class))
        builder.setSentenceID(getAndRegister(sentence, keysToSerialize, SentenceIDAnnotation.class));
    // Return
    return builder;
}
Also used : RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) Mention(edu.stanford.nlp.coref.data.Mention) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) Tree(edu.stanford.nlp.trees.Tree) SegmenterCoreAnnotations(edu.stanford.nlp.ling.SegmenterCoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel)

Aggregations

EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)14 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)9 CoreLabel (edu.stanford.nlp.ling.CoreLabel)8 CoreMap (edu.stanford.nlp.util.CoreMap)8 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)7 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)5 Tree (edu.stanford.nlp.trees.Tree)4 ArrayList (java.util.ArrayList)4 Span (edu.stanford.nlp.ie.machinereading.structure.Span)3 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 SentimentCoreAnnotations (edu.stanford.nlp.sentiment.SentimentCoreAnnotations)3 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)2 CorefChain (edu.stanford.nlp.coref.data.CorefChain)2 AceEntityMention (edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntityMention)2 NaturalLogicAnnotations (edu.stanford.nlp.naturalli.NaturalLogicAnnotations)2 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)2 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)2