Search in sources :

Example 51 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method setUtteranceAndSpeakerAnnotation.

private static void setUtteranceAndSpeakerAnnotation(Document doc) {
    doc.speakerInfoGiven = false;
    int utterance = 0;
    // the utterance of outside of quotation
    int outsideQuoteUtterance = 0;
    boolean insideQuotation = false;
    List<CoreLabel> tokens = doc.annotation.get(CoreAnnotations.TokensAnnotation.class);
    String preSpeaker = (tokens.size() > 0) ? tokens.get(0).get(CoreAnnotations.SpeakerAnnotation.class) : null;
    for (CoreLabel l : tokens) {
        String curSpeaker = l.get(CoreAnnotations.SpeakerAnnotation.class);
        String w = l.get(CoreAnnotations.TextAnnotation.class);
        if (curSpeaker != null && !curSpeaker.equals("-"))
            doc.speakerInfoGiven = true;
        boolean speakerChange = doc.speakerInfoGiven && curSpeaker != null && !curSpeaker.equals(preSpeaker);
        boolean quoteStart = w.equals("``") || (!insideQuotation && w.equals("\""));
        boolean quoteEnd = w.equals("''") || (insideQuotation && w.equals("\""));
        if (speakerChange) {
            if (quoteStart) {
                utterance = doc.maxUtter + 1;
                outsideQuoteUtterance = utterance + 1;
            } else {
                utterance = doc.maxUtter + 1;
                outsideQuoteUtterance = utterance;
            }
            preSpeaker = curSpeaker;
        } else {
            if (quoteStart) {
                utterance = doc.maxUtter + 1;
            }
        }
        if (quoteEnd) {
            utterance = outsideQuoteUtterance;
            insideQuotation = false;
        }
        if (doc.maxUtter < utterance)
            doc.maxUtter = utterance;
        l.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
        // quote start got outside utterance idx
        if (quoteStart)
            l.set(CoreAnnotations.UtteranceAnnotation.class, outsideQuoteUtterance);
        boolean noSpeakerInfo = !l.containsKey(CoreAnnotations.SpeakerAnnotation.class) || l.get(CoreAnnotations.SpeakerAnnotation.class).equals("") || l.get(CoreAnnotations.SpeakerAnnotation.class).startsWith("PER");
        if (noSpeakerInfo || insideQuotation) {
            l.set(CoreAnnotations.SpeakerAnnotation.class, "PER" + utterance);
        }
        if (quoteStart)
            insideQuotation = true;
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) UtteranceAnnotation(edu.stanford.nlp.ling.CoreAnnotations.UtteranceAnnotation) SpeakerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SpeakerAnnotation)

Example 52 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method findSpeakers.

/** Speaker extraction */
private static void findSpeakers(Document doc, Dictionaries dict) {
    Boolean useMarkedDiscourseBoolean = doc.annotation.get(CoreAnnotations.UseMarkedDiscourseAnnotation.class);
    boolean useMarkedDiscourse = (useMarkedDiscourseBoolean != null) ? useMarkedDiscourseBoolean : false;
    if (!useMarkedDiscourse) {
        if (doc.docType == DocType.CONVERSATION)
            findSpeakersInConversation(doc, dict);
        else if (doc.docType == DocType.ARTICLE)
            findSpeakersInArticle(doc, dict);
    }
    for (CoreMap sent : doc.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
            int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class);
            if (!doc.speakers.containsKey(utterIndex)) {
                doc.speakers.put(utterIndex, w.get(CoreAnnotations.SpeakerAnnotation.class));
            }
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) SpeakerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SpeakerAnnotation)

Example 53 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method findParagraphSpeaker.

private static String findParagraphSpeaker(Document doc, List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
    if (!doc.speakers.containsKey(paragraphUtterIndex)) {
        if (!nextParagraphSpeaker.isEmpty()) {
            doc.speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
        } else {
            // cdm [Sept 2015] added this check to try to avoid crash
            if (paragraph.isEmpty()) {
                Redwood.log("debug-preprocessor", "Empty paragraph; skipping findParagraphSpeaker");
                return "";
            }
            CoreMap lastSent = paragraph.get(paragraph.size() - 1);
            String speaker = "";
            boolean hasVerb = false;
            for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
                CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
                String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                if (pos.startsWith("V")) {
                    hasVerb = true;
                    break;
                }
                if (ner.startsWith("PER")) {
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, i);
                    if (doc.mentionheadPositions.containsKey(headPosition)) {
                        speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
                    }
                }
            }
            if (!hasVerb && !speaker.equals("")) {
                doc.speakers.put(paragraphUtterIndex, speaker);
            }
        }
    }
    return findNextParagraphSpeaker(doc, paragraph, paragraphOffset, dict);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) IntTuple(edu.stanford.nlp.util.IntTuple) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)

Example 54 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CorefRules method entitySameProperHeadLastWord.

/** Check whether two mentions have the same proper head words */
public static boolean entitySameProperHeadLastWord(Mention m, Mention a) {
    if (!m.headString.equalsIgnoreCase(a.headString) || !m.sentenceWords.get(m.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP") || !a.sentenceWords.get(a.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
        return false;
    }
    if (!m.removePhraseAfterHead().toLowerCase().endsWith(m.headString) || !a.removePhraseAfterHead().toLowerCase().endsWith(a.headString)) {
        return false;
    }
    Set<String> mProperNouns = Generics.newHashSet();
    Set<String> aProperNouns = Generics.newHashSet();
    for (CoreLabel w : m.sentenceWords.subList(m.startIndex, m.headIndex)) {
        if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
            mProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
        }
    }
    for (CoreLabel w : a.sentenceWords.subList(a.startIndex, a.headIndex)) {
        if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
            aProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
        }
    }
    boolean mHasExtra = false;
    boolean aHasExtra = false;
    for (String s : mProperNouns) {
        if (!aProperNouns.contains(s)) {
            mHasExtra = true;
            break;
        }
    }
    for (String s : aProperNouns) {
        if (!mProperNouns.contains(s)) {
            aHasExtra = true;
            break;
        }
    }
    if (mHasExtra && aHasExtra)
        return false;
    return true;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Example 55 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class ACEMentionExtractor method extractGoldMentions.

private void extractGoldMentions(CoreMap s, List<List<Mention>> allGoldMentions, EntityComparator comparator) {
    List<Mention> goldMentions = new ArrayList<>();
    allGoldMentions.add(goldMentions);
    List<EntityMention> goldMentionList = s.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
    List<CoreLabel> words = s.get(CoreAnnotations.TokensAnnotation.class);
    TreeSet<EntityMention> treeForSortGoldMentions = new TreeSet<>(comparator);
    if (goldMentionList != null)
        treeForSortGoldMentions.addAll(goldMentionList);
    if (!treeForSortGoldMentions.isEmpty()) {
        for (EntityMention e : treeForSortGoldMentions) {
            Mention men = new Mention();
            men.dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            if (men.dependency == null) {
                men.dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
            }
            men.startIndex = e.getExtentTokenStart();
            men.endIndex = e.getExtentTokenEnd();
            String[] parseID = e.getObjectId().split("-");
            men.mentionID = Integer.parseInt(parseID[parseID.length - 1]);
            String[] parseCorefID = e.getCorefID().split("-E");
            men.goldCorefClusterID = Integer.parseInt(parseCorefID[parseCorefID.length - 1]);
            men.originalRef = -1;
            for (int j = allGoldMentions.size() - 1; j >= 0; j--) {
                List<Mention> l = allGoldMentions.get(j);
                for (int k = l.size() - 1; k >= 0; k--) {
                    Mention m = l.get(k);
                    if (men.goldCorefClusterID == m.goldCorefClusterID) {
                        men.originalRef = m.mentionID;
                    }
                }
            }
            goldMentions.add(men);
            if (men.mentionID > maxID)
                maxID = men.mentionID;
            // set ner type
            for (int j = e.getExtentTokenStart(); j < e.getExtentTokenEnd(); j++) {
                CoreLabel word = words.get(j);
                String ner = e.getType() + "-" + e.getSubType();
                if (Constants.USE_GOLD_NE) {
                    word.set(CoreAnnotations.EntityTypeAnnotation.class, e.getMentionType());
                    if (e.getMentionType().equals("NAM"))
                        word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
                }
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) MachineReadingAnnotations(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) TreeSet(java.util.TreeSet) EntityMention(edu.stanford.nlp.ie.machinereading.structure.EntityMention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)536 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)311 CoreMap (edu.stanford.nlp.util.CoreMap)103 ArrayList (java.util.ArrayList)102 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)32 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)25 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 StringReader (java.io.StringReader)20 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18