Search in sources :

Example 1 with IntTuple

use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method findParagraphSpeaker.

private static String findParagraphSpeaker(Document doc, List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
    if (!doc.speakers.containsKey(paragraphUtterIndex)) {
        if (!nextParagraphSpeaker.isEmpty()) {
            doc.speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
        } else {
            // cdm [Sept 2015] added this check to try to avoid crash
            if (paragraph.isEmpty()) {
                Redwood.log("debug-preprocessor", "Empty paragraph; skipping findParagraphSpeaker");
                return "";
            }
            CoreMap lastSent = paragraph.get(paragraph.size() - 1);
            String speaker = "";
            boolean hasVerb = false;
            for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
                CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
                String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                if (pos.startsWith("V")) {
                    hasVerb = true;
                    break;
                }
                if (ner.startsWith("PER")) {
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, i);
                    if (doc.mentionheadPositions.containsKey(headPosition)) {
                        speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
                    }
                }
            }
            if (!hasVerb && !speaker.equals("")) {
                doc.speakers.put(paragraphUtterIndex, speaker);
            }
        }
    }
    return findNextParagraphSpeaker(doc, paragraph, paragraphOffset, dict);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) IntTuple(edu.stanford.nlp.util.IntTuple) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)

Example 2 with IntTuple

use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.

the class Document method initializeCorefCluster.

/**
 * initialize positions and corefClusters (put each mention in each CorefCluster)
 */
private void initializeCorefCluster() {
    for (int i = 0; i < predictedOrderedMentionsBySentence.size(); i++) {
        for (int j = 0; j < predictedOrderedMentionsBySentence.get(i).size(); j++) {
            Mention m = predictedOrderedMentionsBySentence.get(i).get(j);
            if (allPredictedMentions.containsKey(m.mentionID)) {
                SieveCoreferenceSystem.logger.warning("WARNING: Already contain mention " + m.mentionID);
                Mention m1 = allPredictedMentions.get(m.mentionID);
                SieveCoreferenceSystem.logger.warning("OLD mention: " + m1.spanToString() + "[" + m1.startIndex + "," + m1.endIndex + "]");
                SieveCoreferenceSystem.logger.warning("NEW mention: " + m.spanToString() + "[" + m.startIndex + "," + m.endIndex + "]");
            // SieveCoreferenceSystem.debugPrintMentions(System.err, "PREDICTED ORDERED", predictedOrderedMentionsBySentence);
            // SieveCoreferenceSystem.debugPrintMentions(System.err, "GOLD ORDERED", goldOrderedMentionsBySentence);
            }
            assert (!allPredictedMentions.containsKey(m.mentionID));
            allPredictedMentions.put(m.mentionID, m);
            IntTuple pos = new IntTuple(2);
            pos.set(0, i);
            pos.set(1, j);
            positions.put(m, pos);
            m.sentNum = i;
            assert (!corefClusters.containsKey(m.mentionID));
            corefClusters.put(m.mentionID, new CorefCluster(m.mentionID, Generics.newHashSet(Collections.singletonList(m))));
            m.corefClusterID = m.mentionID;
            IntTuple headPosition = new IntTuple(2);
            headPosition.set(0, i);
            headPosition.set(1, m.headIndex);
            mentionheadPositions.put(headPosition, m);
        }
    }
}
Also used : IntTuple(edu.stanford.nlp.util.IntTuple)

Example 3 with IntTuple

use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.

the class Document method findSpeaker.

private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) {
    List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
    for (int i = startIndex; i < endIndex; i++) {
        if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0)
            continue;
        String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
        String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
        if (dict.reportVerb.contains(lemma)) {
            // find subject
            SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
            IndexedWord w = dependency.getNodeByWordPattern(word);
            if (w != null) {
                for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
                    if (child.first().getShortName().equals("nsubj")) {
                        String subjectString = child.second().word();
                        // start from 1
                        int subjectIndex = child.second().index();
                        IntTuple headPosition = new IntTuple(2);
                        headPosition.set(0, sentNum);
                        headPosition.set(1, subjectIndex - 1);
                        String speaker;
                        if (mentionheadPositions.containsKey(headPosition)) {
                            speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                        } else {
                            speaker = subjectString;
                        }
                        speakers.put(utterNum, speaker);
                        return true;
                    }
                }
            } else {
                SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
            }
        }
    }
    return false;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) IntTuple(edu.stanford.nlp.util.IntTuple) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 4 with IntTuple

use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.

the class DeterministicCorefAnnotator method addObsoleteCoreferenceAnnotations.

// for backward compatibility with a few old things
// TODO: Aim to get rid of this entirely
private static void addObsoleteCoreferenceAnnotations(Annotation annotation, List<List<Mention>> orderedMentions, Map<Integer, CorefChain> result) {
    List<Pair<IntTuple, IntTuple>> links = SieveCoreferenceSystem.getLinks(result);
    if (VERBOSE) {
        System.err.printf("Found %d coreference links:\n", links.size());
        for (Pair<IntTuple, IntTuple> link : links) {
            System.err.printf("LINK (%d, %d) -> (%d, %d)\n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
        }
    }
    // 
    // save the coref output as CorefGraphAnnotation
    // 
    // cdm 2013: this block didn't seem to be doing anything needed....
    // List<List<CoreLabel>> sents = new ArrayList<List<CoreLabel>>();
    // for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    // List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    // sents.add(tokens);
    // }
    // this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
    List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
    for (Pair<IntTuple, IntTuple> link : links) {
        // 
        // Note: all offsets in the graph start at 1 (not at 0!)
        // we do this for consistency reasons, as indices for syntactic dependencies start at 1
        // 
        int srcSent = link.first.get(0);
        int srcTok = orderedMentions.get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
        int dstSent = link.second.get(0);
        int dstTok = orderedMentions.get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
        IntTuple dst = new IntTuple(2);
        dst.set(0, dstSent);
        dst.set(1, dstTok);
        IntTuple src = new IntTuple(2);
        src.set(0, srcSent);
        src.set(1, srcTok);
        graph.add(new Pair<>(src, dst));
    }
    annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
    for (CorefChain corefChain : result.values()) {
        if (corefChain.getMentionsInTextualOrder().size() < 2)
            continue;
        Set<CoreLabel> coreferentTokens = Generics.newHashSet();
        for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
            CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
            CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
            coreferentTokens.add(token);
        }
        for (CoreLabel token : coreferentTokens) {
            token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention) IntTuple(edu.stanford.nlp.util.IntTuple) CorefChain(edu.stanford.nlp.dcoref.CorefChain) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.stanford.nlp.util.Pair)

Example 5 with IntTuple

use of edu.stanford.nlp.util.IntTuple in project CoreNLP by stanfordnlp.

the class Document method extractGoldLinks.

/** Extract gold coref link information */
protected void extractGoldLinks() {
    //    List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
    List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
    // position of each mention in the input matrix, by id
    Map<Integer, IntTuple> positions = Generics.newHashMap();
    // positions of antecedents
    Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
    for (int i = 0; i < goldMentions.size(); i++) {
        for (int j = 0; j < goldMentions.get(i).size(); j++) {
            Mention m = goldMentions.get(i).get(j);
            int id = m.mentionID;
            IntTuple pos = new IntTuple(2);
            pos.set(0, i);
            pos.set(1, j);
            positions.put(id, pos);
            antecedents.put(id, new ArrayList<>());
        }
    }
    //    SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
    for (List<Mention> mentions : goldMentions) {
        for (Mention m : mentions) {
            int id = m.mentionID;
            IntTuple src = positions.get(id);
            assert (src != null);
            if (m.originalRef >= 0) {
                IntTuple dst = positions.get(m.originalRef);
                if (dst == null) {
                    throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
                }
                // to deal with cataphoric annotation
                while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
                    Mention dstMention = goldMentions.get(dst.get(0)).get(dst.get(1));
                    m.originalRef = dstMention.originalRef;
                    dstMention.originalRef = id;
                    if (m.originalRef < 0)
                        break;
                    dst = positions.get(m.originalRef);
                }
                if (m.originalRef < 0)
                    continue;
                // A B C: if A<-B, A<-C => make a link B<-C
                for (int k = dst.get(0); k <= src.get(0); k++) {
                    for (int l = 0; l < goldMentions.get(k).size(); l++) {
                        if (k == dst.get(0) && l < dst.get(1))
                            continue;
                        if (k == src.get(0) && l > src.get(1))
                            break;
                        IntTuple missed = new IntTuple(2);
                        missed.set(0, k);
                        missed.set(1, l);
                        if (links.contains(new Pair<>(missed, dst))) {
                            antecedents.get(id).add(missed);
                            links.add(new Pair<>(src, missed));
                        }
                    }
                }
                links.add(new Pair<>(src, dst));
                assert (antecedents.get(id) != null);
                antecedents.get(id).add(dst);
                List<IntTuple> ants = antecedents.get(m.originalRef);
                assert (ants != null);
                for (IntTuple ant : ants) {
                    antecedents.get(id).add(ant);
                    links.add(new Pair<>(src, ant));
                }
            }
        }
    }
    goldLinks = links;
}
Also used : ArrayList(java.util.ArrayList) IntTuple(edu.stanford.nlp.util.IntTuple) List(java.util.List) ArrayList(java.util.ArrayList) Pair(edu.stanford.nlp.util.Pair)

Aggregations

IntTuple (edu.stanford.nlp.util.IntTuple)11 CoreLabel (edu.stanford.nlp.ling.CoreLabel)6 CoreMap (edu.stanford.nlp.util.CoreMap)6 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)5 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)5 IndexedWord (edu.stanford.nlp.ling.IndexedWord)4 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)4 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)3 Pair (edu.stanford.nlp.util.Pair)3 BasicDependenciesAnnotation (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)2 ArrayList (java.util.ArrayList)2 CorefChain (edu.stanford.nlp.dcoref.CorefChain)1 CorefMention (edu.stanford.nlp.dcoref.CorefChain.CorefMention)1 CorefCoreAnnotations (edu.stanford.nlp.dcoref.CorefCoreAnnotations)1 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)1 Tree (edu.stanford.nlp.trees.Tree)1 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)1 IntPair (edu.stanford.nlp.util.IntPair)1 List (java.util.List)1