Search in sources :

Example 81 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalDependenciesFeatureAnnotator method addFeatures.

public void addFeatures(SemanticGraph sg, Tree t, boolean addLemma, boolean addUPOS) {
    Set<Integer> imperatives = t != null ? getImperatives(t) : new HashSet<>();
    for (IndexedWord word : sg.vertexListSorted()) {
        String posTag = word.get(CoreAnnotations.PartOfSpeechAnnotation.class);
        String token = word.get(CoreAnnotations.TextAnnotation.class);
        Integer index = word.get(CoreAnnotations.IndexAnnotation.class);
        HashMap<String, String> wordFeatures = word.get(CoreAnnotations.CoNLLUFeats.class);
        if (wordFeatures == null) {
            wordFeatures = new HashMap<>();
            word.set(CoreAnnotations.CoNLLUFeats.class, wordFeatures);
        }
        /* Features that only depend on the word and the PTB POS tag. */
        wordFeatures.putAll(getPOSFeatures(token, posTag));
        /* Semantic graph features. */
        wordFeatures.putAll(getGraphFeatures(sg, word));
        /* Handle VBs. */
        if (imperatives.contains(index)) {
            /* Imperative */
            wordFeatures.put("VerbForm", "Fin");
            wordFeatures.put("Mood", "Imp");
        } else if (posTag.equals("VB")) {
            /* Infinitive */
            wordFeatures.put("VerbForm", "Inf");
        /* Subjunctive detection too unreliable. */
        //} else {
        //  /* Present subjunctive */
        //  wordFeatures.put("VerbForm", "Fin");
        //  wordFeatures.put("Tense", "Pres");
        //  wordFeatures.put("Mood", "Subj");
        //}
        }
        String lemma = word.get(CoreAnnotations.LemmaAnnotation.class);
        if (addLemma && (lemma == null || lemma.equals("_"))) {
            word.set(CoreAnnotations.LemmaAnnotation.class, morphology.lemma(token, posTag));
        }
    }
    if (addUPOS && t != null) {
        t = UniversalPOSMapper.mapTree(t);
        List<Label> uPOSTags = t.preTerminalYield();
        List<IndexedWord> yield = sg.vertexListSorted();
        int len = yield.size();
        for (IndexedWord word : yield) {
            Label uPOSTag = uPOSTags.get(word.index() - 1);
            word.set(CoreAnnotations.CoarseTagAnnotation.class, uPOSTag.value());
        }
    }
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Label(edu.stanford.nlp.ling.Label) CoreLabel(edu.stanford.nlp.ling.CoreLabel) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 82 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class TSVUtils method parseTree.

/**
   * Parse a CoNLL formatted tree into a SemanticGraph.
   * @param conll The CoNLL tree to parse.
   * @param tokens The tokens of the sentence, to form the backing labels of the tree.
   * @return A semantic graph of the sentence, according to the given tree.
   */
public static SemanticGraph parseTree(String conll, List<CoreLabel> tokens) {
    SemanticGraph tree = new SemanticGraph();
    if (conll == null || conll.isEmpty()) {
        return tree;
    }
    String[] treeLines = newline.split(conll);
    IndexedWord[] vertices = new IndexedWord[tokens.size() + 2];
    // Add edges
    for (String line : treeLines) {
        // Parse row
        String[] fields = tab.split(line);
        int dependentIndex = Integer.parseInt(fields[0]);
        if (vertices[dependentIndex] == null) {
            if (dependentIndex > tokens.size()) {
                // Bizarre mismatch in sizes; the malt parser seems to do this often
                return new SemanticGraph();
            }
            vertices[dependentIndex] = new IndexedWord(tokens.get(dependentIndex - 1));
        }
        IndexedWord dependent = vertices[dependentIndex];
        int governorIndex = Integer.parseInt(fields[1]);
        if (governorIndex > tokens.size()) {
            // Bizarre mismatch in sizes; the malt parser seems to do this often
            return new SemanticGraph();
        }
        if (vertices[governorIndex] == null && governorIndex > 0) {
            vertices[governorIndex] = new IndexedWord(tokens.get(governorIndex - 1));
        }
        IndexedWord governor = vertices[governorIndex];
        String relation = fields[2];
        // Process row
        if (governorIndex == 0) {
            tree.addRoot(dependent);
        } else {
            tree.addVertex(dependent);
            if (!tree.containsVertex(governor)) {
                tree.addVertex(governor);
            }
            if (!"ref".equals(relation)) {
                tree.addEdge(governor, dependent, GrammaticalRelation.valueOf(Language.English, relation), Double.NEGATIVE_INFINITY, false);
            }
        }
    }
    return tree;
}
Also used : SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 83 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class ProtobufAnnotationSerializer method fromProto.

/**
   * Returns a complete document, intended to mimic a document passes as input to
   * {@link ProtobufAnnotationSerializer#toProto(Annotation)} as closely as possible.
   * That is, most common fields are serialized, but there is not guarantee that custom additions
   * will be saved and retrieved.
   *
   * @param proto The protocol buffer to read the document from.
   * @return An Annotation corresponding to the read protobuf.
   */
@SuppressWarnings("deprecation")
public Annotation fromProto(CoreNLPProtos.Document proto) {
    if (Thread.interrupted()) {
        throw new RuntimeInterruptedException();
    }
    // Set text
    Annotation ann = new Annotation(proto.getText());
    // if there are characters, add characters
    if (proto.getCharacterCount() > 0) {
        List<CoreLabel> docChars = new ArrayList<CoreLabel>();
        for (CoreNLPProtos.Token c : proto.getCharacterList()) {
            docChars.add(fromProto(c));
        }
        ann.set(SegmenterCoreAnnotations.CharactersAnnotation.class, docChars);
    }
    // Add tokens
    List<CoreLabel> tokens = new ArrayList<>();
    if (proto.getSentenceCount() > 0) {
        // Populate the tokens from the sentence
        for (CoreNLPProtos.Sentence sentence : proto.getSentenceList()) {
            // It's conceivable that the sentences are not contiguous -- pad this with nulls
            while (sentence.hasTokenOffsetBegin() && tokens.size() < sentence.getTokenOffsetBegin()) {
                tokens.add(null);
            }
            // Read the sentence
            for (CoreNLPProtos.Token token : sentence.getTokenList()) {
                CoreLabel coreLabel = fromProto(token);
                // Set docid
                if (proto.hasDocID()) {
                    coreLabel.setDocID(proto.getDocID());
                }
                if (token.hasTokenBeginIndex() && token.hasTokenEndIndex()) {
                    // This is usually true, if enough annotators are defined
                    while (tokens.size() < sentence.getTokenOffsetEnd()) {
                        tokens.add(null);
                    }
                    for (int i = token.getTokenBeginIndex(); i < token.getTokenEndIndex(); ++i) {
                        tokens.set(token.getTokenBeginIndex(), coreLabel);
                    }
                } else {
                    // Assume this token spans a single token, and just add it to the tokens list
                    tokens.add(coreLabel);
                }
            }
        }
    } else if (proto.getSentencelessTokenCount() > 0) {
        // Eek -- no sentences. Try to recover tokens directly
        if (proto.getSentencelessTokenCount() > 0) {
            for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
                CoreLabel coreLabel = fromProto(token);
                // Set docid
                if (proto.hasDocID()) {
                    coreLabel.setDocID(proto.getDocID());
                }
                tokens.add(coreLabel);
            }
        }
    }
    if (!tokens.isEmpty()) {
        ann.set(TokensAnnotation.class, tokens);
    }
    // Add sentences
    List<CoreMap> sentences = new ArrayList<>(proto.getSentenceCount());
    for (int sentIndex = 0; sentIndex < proto.getSentenceCount(); ++sentIndex) {
        CoreNLPProtos.Sentence sentence = proto.getSentence(sentIndex);
        CoreMap map = fromProtoNoTokens(sentence);
        if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd() && map.get(TokensAnnotation.class) == null) {
            // Set tokens for sentence
            int tokenBegin = sentence.getTokenOffsetBegin();
            int tokenEnd = sentence.getTokenOffsetEnd();
            assert tokenBegin <= tokens.size() && tokenBegin <= tokenEnd;
            assert tokenEnd <= tokens.size();
            map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
            // Set sentence index + token index + paragraph index
            for (int i = tokenBegin; i < tokenEnd; ++i) {
                tokens.get(i).setSentIndex(sentIndex);
                tokens.get(i).setIndex(i - sentence.getTokenOffsetBegin() + 1);
                if (sentence.hasParagraph()) {
                    tokens.get(i).set(ParagraphAnnotation.class, sentence.getParagraph());
                }
            }
            // Set text
            int characterBegin = sentence.getCharacterOffsetBegin();
            int characterEnd = sentence.getCharacterOffsetEnd();
            if (characterEnd <= proto.getText().length()) {
                // The usual case -- get the text from the document text
                map.set(TextAnnotation.class, proto.getText().substring(characterBegin, characterEnd));
            } else {
                // The document text is wrong -- guess the text from the tokens
                map.set(TextAnnotation.class, recoverOriginalText(tokens.subList(tokenBegin, tokenEnd), sentence));
            }
        }
        // End iteration
        sentences.add(map);
    }
    if (!sentences.isEmpty()) {
        ann.set(SentencesAnnotation.class, sentences);
    }
    // Set DocID
    String docid = null;
    if (proto.hasDocID()) {
        docid = proto.getDocID();
        ann.set(DocIDAnnotation.class, docid);
    }
    // Set reference time
    if (proto.hasDocDate()) {
        ann.set(DocDateAnnotation.class, proto.getDocDate());
    }
    if (proto.hasCalendar()) {
        GregorianCalendar calendar = new GregorianCalendar();
        calendar.setTimeInMillis(proto.getCalendar());
        ann.set(CalendarAnnotation.class, calendar);
    }
    // Set coref chain
    Map<Integer, CorefChain> corefChains = new HashMap<>();
    for (CoreNLPProtos.CorefChain chainProto : proto.getCorefChainList()) {
        CorefChain chain = fromProto(chainProto, ann);
        corefChains.put(chain.getChainID(), chain);
    }
    if (!corefChains.isEmpty()) {
        ann.set(CorefChainAnnotation.class, corefChains);
    }
    // hashes to access Mentions , later in this method need to add speakerInfo to Mention
    // so we need to create id -> Mention, CoreNLPProtos.Mention maps to do this, since SpeakerInfo could reference
    // any Mention in doc
    HashMap<Integer, Mention> idToMention = new HashMap<>();
    HashMap<Integer, CoreNLPProtos.Mention> idToProtoMention = new HashMap<>();
    // Set things in the sentence that need a document context.
    for (int sentenceIndex = 0; sentenceIndex < proto.getSentenceCount(); ++sentenceIndex) {
        CoreNLPProtos.Sentence sentence = proto.getSentenceList().get(sentenceIndex);
        CoreMap map = sentences.get(sentenceIndex);
        List<CoreLabel> sentenceTokens = map.get(TokensAnnotation.class);
        // Set dependency graphs
        if (sentence.hasBasicDependencies()) {
            map.set(BasicDependenciesAnnotation.class, fromProto(sentence.getBasicDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasCollapsedDependencies()) {
            map.set(CollapsedDependenciesAnnotation.class, fromProto(sentence.getCollapsedDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasCollapsedCCProcessedDependencies()) {
            map.set(CollapsedCCProcessedDependenciesAnnotation.class, fromProto(sentence.getCollapsedCCProcessedDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasAlternativeDependencies()) {
            map.set(AlternativeDependenciesAnnotation.class, fromProto(sentence.getAlternativeDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasEnhancedDependencies()) {
            map.set(EnhancedDependenciesAnnotation.class, fromProto(sentence.getEnhancedDependencies(), sentenceTokens, docid));
        }
        if (sentence.hasEnhancedPlusPlusDependencies()) {
            map.set(EnhancedPlusPlusDependenciesAnnotation.class, fromProto(sentence.getEnhancedPlusPlusDependencies(), sentenceTokens, docid));
        }
        // Set entailed sentences
        if (sentence.getEntailedSentenceCount() > 0) {
            Set<SentenceFragment> entailedSentences = sentence.getEntailedSentenceList().stream().map(frag -> fromProto(frag, map.get(EnhancedPlusPlusDependenciesAnnotation.class))).collect(Collectors.toSet());
            map.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, entailedSentences);
        }
        if (sentence.getEntailedClauseCount() > 0) {
            Set<SentenceFragment> entailedClauses = sentence.getEntailedClauseList().stream().map(frag -> fromProto(frag, map.get(CollapsedDependenciesAnnotation.class))).collect(Collectors.toSet());
            map.set(NaturalLogicAnnotations.EntailedClausesAnnotation.class, entailedClauses);
        }
        // Set relation triples
        if (sentence.getOpenieTripleCount() > 0) {
            List<RelationTriple> triples = new ArrayList<>();
            for (CoreNLPProtos.RelationTriple triple : sentence.getOpenieTripleList()) {
                triples.add(fromProto(triple, ann, sentenceIndex));
            }
            map.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, triples);
        }
        // Redo some light annotation
        if (map.containsKey(TokensAnnotation.class) && (!sentence.hasHasNumerizedTokensAnnotation() || sentence.getHasNumerizedTokensAnnotation())) {
            map.set(NumerizedTokensAnnotation.class, NumberNormalizer.findAndMergeNumbers(map));
        }
        // add the CoreLabel and IndexedWord info to each mention
        // when Mentions are serialized, just storing the index in the sentence for CoreLabels and IndexedWords
        // this is the point where the de-serialized sentence has tokens
        int mentionInt = 0;
        for (CoreNLPProtos.Mention protoMention : sentence.getMentionsForCorefList()) {
            // get the mention
            Mention mentionToUpdate = map.get(CorefMentionsAnnotation.class).get(mentionInt);
            // store these in hash for more processing later in this method
            idToMention.put(mentionToUpdate.mentionID, mentionToUpdate);
            idToProtoMention.put(mentionToUpdate.mentionID, protoMention);
            // update the values
            int headIndexedWordIndex = protoMention.getHeadIndexedWord().getTokenIndex();
            if (headIndexedWordIndex >= 0) {
                mentionToUpdate.headIndexedWord = new IndexedWord(sentenceTokens.get(protoMention.getHeadIndexedWord().getTokenIndex()));
                mentionToUpdate.headIndexedWord.setCopyCount(protoMention.getHeadIndexedWord().getCopyCount());
            }
            int dependingVerbIndex = protoMention.getDependingVerb().getTokenIndex();
            if (dependingVerbIndex >= 0) {
                mentionToUpdate.dependingVerb = new IndexedWord(sentenceTokens.get(protoMention.getDependingVerb().getTokenIndex()));
                mentionToUpdate.dependingVerb.setCopyCount(protoMention.getDependingVerb().getCopyCount());
            }
            int headWordIndex = protoMention.getHeadWord().getTokenIndex();
            if (headWordIndex >= 0) {
                mentionToUpdate.headWord = sentenceTokens.get(protoMention.getHeadWord().getTokenIndex());
            }
            mentionToUpdate.sentenceWords = new ArrayList<>();
            for (CoreNLPProtos.IndexedWord clp : protoMention.getSentenceWordsList()) {
                int ti = clp.getTokenIndex();
                mentionToUpdate.sentenceWords.add(sentenceTokens.get(ti));
            }
            mentionToUpdate.originalSpan = new ArrayList<>();
            for (CoreNLPProtos.IndexedWord clp : protoMention.getOriginalSpanList()) {
                int ti = clp.getTokenIndex();
                mentionToUpdate.originalSpan.add(sentenceTokens.get(ti));
            }
            if (protoMention.getHasBasicDependency()) {
                mentionToUpdate.basicDependency = map.get(BasicDependenciesAnnotation.class);
            }
            if (protoMention.getHasEnhancedDepenedncy()) {
                mentionToUpdate.enhancedDependency = map.get(EnhancedDependenciesAnnotation.class);
            }
            if (protoMention.getHasContextParseTree()) {
                mentionToUpdate.contextParseTree = map.get(TreeAnnotation.class);
            }
            // move on to next mention
            mentionInt++;
        }
    }
    // Set quotes
    List<CoreMap> quotes = proto.getQuoteList().stream().map(quote -> fromProto(quote, tokens)).collect(Collectors.toList());
    if (!quotes.isEmpty()) {
        ann.set(QuotationsAnnotation.class, quotes);
    }
    // Set NERmention
    List<CoreMap> mentions = proto.getMentionsList().stream().map(this::fromProto).collect(Collectors.toList());
    if (!mentions.isEmpty()) {
        ann.set(MentionsAnnotation.class, mentions);
    }
    // also add all the Set<Mention>
    for (int mentionID : idToMention.keySet()) {
        // this is the Mention message corresponding to this Mention
        Mention mentionToUpdate = idToMention.get(mentionID);
        CoreNLPProtos.Mention correspondingProtoMention = idToProtoMention.get(mentionID);
        if (!correspondingProtoMention.hasSpeakerInfo()) {
            // so just continue to next Mention
            continue;
        }
        // if we're here we know a speakerInfo was stored
        SpeakerInfo speakerInfo = fromProto(correspondingProtoMention.getSpeakerInfo());
        // MentionID is ID in document, 0, 1, 2, etc...
        for (int speakerInfoMentionID : correspondingProtoMention.getSpeakerInfo().getMentionsList()) {
            speakerInfo.addMention(idToMention.get(speakerInfoMentionID));
        }
        // now the SpeakerInfo for this Mention should be fully restored
        mentionToUpdate.speakerInfo = speakerInfo;
    }
    // Return
    return ann;
}
Also used : ExtractionObject(edu.stanford.nlp.ie.machinereading.structure.ExtractionObject) java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) edu.stanford.nlp.util(edu.stanford.nlp.util) Tree(edu.stanford.nlp.trees.Tree) Dictionaries(edu.stanford.nlp.coref.data.Dictionaries) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) TimeAnnotations(edu.stanford.nlp.time.TimeAnnotations) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Language(edu.stanford.nlp.international.Language) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode) Timex(edu.stanford.nlp.time.Timex) IndexedWord(edu.stanford.nlp.ling.IndexedWord) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) edu.stanford.nlp.naturalli(edu.stanford.nlp.naturalli) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) NumberNormalizer(edu.stanford.nlp.ie.NumberNormalizer) Collectors(java.util.stream.Collectors) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) SegmenterCoreAnnotations(edu.stanford.nlp.ling.SegmenterCoreAnnotations) SpeakerInfo(edu.stanford.nlp.coref.data.SpeakerInfo) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Word(edu.stanford.nlp.ling.Word) java.io(java.io) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CorefChain(edu.stanford.nlp.coref.data.CorefChain) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) Mention(edu.stanford.nlp.coref.data.Mention) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SegmenterCoreAnnotations(edu.stanford.nlp.ling.SegmenterCoreAnnotations) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SpeakerInfo(edu.stanford.nlp.coref.data.SpeakerInfo) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 84 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class CustomAnnotationSerializer method saveDependencyGraph.

/**
   * Saves all arcs in the graph on two lines: first line contains the vertices, second the edges.
   * @param graph
   * @param pw
   */
private static void saveDependencyGraph(SemanticGraph graph, PrintWriter pw) {
    if (graph == null) {
        pw.println();
        pw.println();
        return;
    }
    boolean outputHeader = false;
    for (IndexedWord node : graph.vertexSet()) {
        // indicate: docid, sentence index
        if (!outputHeader) {
            String docId = node.get(CoreAnnotations.DocIDAnnotation.class);
            if (docId != null && docId.length() > 0)
                pw.print(docId);
            else
                pw.print("-");
            pw.print("\t");
            pw.print(node.get(CoreAnnotations.SentenceIndexAnnotation.class));
            outputHeader = true;
        }
        pw.print("\t");
        pw.print(node.index());
        // These annotations are usually not set, so print them only if necessary
        if (node.copyCount() > 0) {
            pw.print("-");
            pw.print(node.copyCount());
        // System.out.println("FOUND COPY ANNOTATION: " + node.get(CoreAnnotations.CopyAnnotation.class));
        }
        if (graph.getRoots().contains(node)) {
            if (node.copyCount() > 0) {
                pw.print("-R");
            } else {
                pw.print("-0-R");
            }
        }
    }
    pw.println();
    // second line: all edges
    boolean first = true;
    for (SemanticGraphEdge edge : graph.edgeIterable()) {
        if (!first)
            pw.print("\t");
        String rel = edge.getRelation().toString();
        // no spaces allowed in the relation name
        // note that they might occur due to the tokenization of HTML/XML/RDF tags
        rel = rel.replaceAll("\\s+", "");
        pw.print(rel);
        pw.print(" ");
        pw.print(edge.getSource().index());
        pw.print(" ");
        pw.print(edge.getTarget().index());
        if (edge.isExtra() || edge.getSource().copyCount() > 0 || edge.getTarget().copyCount() > 0) {
            pw.print(" ");
            pw.print(edge.isExtra());
            pw.print(" ");
            pw.print(edge.getSource().copyCount());
            pw.print(" ");
            pw.print(edge.getTarget().copyCount());
        }
        first = false;
    }
    pw.println();
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 85 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class CoNLLOutputter method print.

@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
    PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
    // vv A bunch of nonsense to get tokens vv
    if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
        for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
            if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
                List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
                SemanticGraph depTree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
                for (int i = 0; i < tokens.size(); ++i) {
                    // Newline if applicable
                    if (i > 0) {
                        writer.println();
                    }
                    // Try to get the incoming dependency edge
                    int head = -1;
                    String deprel = null;
                    if (depTree != null) {
                        Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
                        IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
                        if (node != null) {
                            List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
                            if (!edgeList.isEmpty()) {
                                assert edgeList.size() == 1;
                                head = edgeList.get(0).getGovernor().index();
                                deprel = edgeList.get(0).getRelation().toString();
                            } else if (rootSet.contains(i + 1)) {
                                head = 0;
                                deprel = "ROOT";
                            }
                        }
                    }
                    // Write the token
                    writer.print(line(i + 1, tokens.get(i), head, deprel));
                }
            }
            writer.println();
            writer.println();
        }
    }
    writer.flush();
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreMap(edu.stanford.nlp.util.CoreMap) PrintWriter(java.io.PrintWriter)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5