Search in sources :

Example 81 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class SieveCoreferenceSystem method printDiscourseStructure.

private static void printDiscourseStructure(Document document) {
    logger.finer("DISCOURSE STRUCTURE==============================");
    logger.finer("doc type: " + document.docType);
    int previousUtterIndex = -1;
    String previousSpeaker = "";
    StringBuilder sb = new StringBuilder();
    for (CoreMap s : document.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel l : s.get(CoreAnnotations.TokensAnnotation.class)) {
            int utterIndex = l.get(CoreAnnotations.UtteranceAnnotation.class);
            String speaker = l.get(CoreAnnotations.SpeakerAnnotation.class);
            String word = l.get(CoreAnnotations.TextAnnotation.class);
            if (previousUtterIndex != utterIndex) {
                try {
                    int previousSpeakerID = Integer.parseInt(previousSpeaker);
                    logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + document.allPredictedMentions.get(previousSpeakerID).spanToString());
                } catch (Exception e) {
                    logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + previousSpeaker);
                }
                logger.finer(sb.toString());
                sb.setLength(0);
                previousUtterIndex = utterIndex;
                previousSpeaker = speaker;
            }
            sb.append(" ").append(word);
        }
        sb.append("\n");
    }
    try {
        int previousSpeakerID = Integer.parseInt(previousSpeaker);
        logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + document.allPredictedMentions.get(previousSpeakerID).spanToString());
    } catch (Exception e) {
        logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + previousSpeaker);
    }
    logger.finer(sb.toString());
    logger.finer("END OF DISCOURSE STRUCTURE==============================");
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 82 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Document method findDocType.

/** Find document type: Conversation or article  */
private DocType findDocType(Dictionaries dict) {
    boolean speakerChange = false;
    Set<Integer> discourseWithIorYou = Generics.newHashSet();
    for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
            int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class);
            if (utterIndex != 0)
                speakerChange = true;
            if (speakerChange && utterIndex == 0)
                return DocType.ARTICLE;
            if (dict.firstPersonPronouns.contains(w.get(CoreAnnotations.TextAnnotation.class).toLowerCase()) || dict.secondPersonPronouns.contains(w.get(CoreAnnotations.TextAnnotation.class).toLowerCase())) {
                discourseWithIorYou.add(utterIndex);
            }
            if (maxUtter < utterIndex)
                maxUtter = utterIndex;
        }
    }
    if (!speakerChange)
        return DocType.ARTICLE;
    // in conversation, utter index keep increasing.
    return DocType.CONVERSATION;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 83 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Document method setParagraphAnnotation.

/** Set paragraph index */
private void setParagraphAnnotation() {
    int paragraphIndex = 0;
    int previousOffset = -10;
    for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
            if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
                if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2)
                    paragraphIndex++;
                w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex);
                previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            } else {
                w.set(CoreAnnotations.ParagraphAnnotation.class, -1);
            }
        }
    }
    for (List<Mention> l : predictedOrderedMentionsBySentence) {
        for (Mention m : l) {
            m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class);
        }
    }
    numParagraph = paragraphIndex;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 84 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Document method findSpeaker.

private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) {
    List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
    for (int i = startIndex; i < endIndex; i++) {
        if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0)
            continue;
        String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
        String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
        if (dict.reportVerb.contains(lemma)) {
            // find subject
            SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
            IndexedWord w = dependency.getNodeByWordPattern(word);
            if (w != null) {
                for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
                    if (child.first().getShortName().equals("nsubj")) {
                        String subjectString = child.second().word();
                        // start from 1
                        int subjectIndex = child.second().index();
                        IntTuple headPosition = new IntTuple(2);
                        headPosition.set(0, sentNum);
                        headPosition.set(1, subjectIndex - 1);
                        String speaker;
                        if (mentionheadPositions.containsKey(headPosition)) {
                            speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                        } else {
                            speaker = subjectString;
                        }
                        speakers.put(utterNum, speaker);
                        return true;
                    }
                }
            } else {
                SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
            }
        }
    }
    return false;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) IntTuple(edu.stanford.nlp.util.IntTuple) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 85 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Rules method entitySameProperHeadLastWord.

/** Check whether two mentions have the same proper head words */
public static boolean entitySameProperHeadLastWord(Mention m, Mention a) {
    if (!m.headString.equalsIgnoreCase(a.headString) || !m.sentenceWords.get(m.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP") || !a.sentenceWords.get(a.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
        return false;
    }
    if (!m.removePhraseAfterHead().toLowerCase().endsWith(m.headString) || !a.removePhraseAfterHead().toLowerCase().endsWith(a.headString)) {
        return false;
    }
    Set<String> mProperNouns = Generics.newHashSet();
    Set<String> aProperNouns = Generics.newHashSet();
    for (CoreLabel w : m.sentenceWords.subList(m.startIndex, m.headIndex)) {
        if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
            mProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
        }
    }
    for (CoreLabel w : a.sentenceWords.subList(a.startIndex, a.headIndex)) {
        if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
            aProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
        }
    }
    boolean mHasExtra = false;
    boolean aHasExtra = false;
    for (String s : mProperNouns) {
        if (!aProperNouns.contains(s)) {
            mHasExtra = true;
            break;
        }
    }
    for (String s : aProperNouns) {
        if (!mProperNouns.contains(s)) {
            aHasExtra = true;
            break;
        }
    }
    if (mHasExtra && aHasExtra)
        return false;
    return true;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)536 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)311 CoreMap (edu.stanford.nlp.util.CoreMap)103 ArrayList (java.util.ArrayList)102 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)32 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)25 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 StringReader (java.io.StringReader)20 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18