Search in sources :

Example 1 with CorefChain

use of edu.stanford.nlp.dcoref.CorefChain in project CoreNLP by stanfordnlp.

the class DeterministicCorefAnnotator method addObsoleteCoreferenceAnnotations.

// for backward compatibility with a few old things
// TODO: Aim to get rid of this entirely
private static void addObsoleteCoreferenceAnnotations(Annotation annotation, List<List<Mention>> orderedMentions, Map<Integer, CorefChain> result) {
    List<Pair<IntTuple, IntTuple>> links = SieveCoreferenceSystem.getLinks(result);
    if (VERBOSE) {
        System.err.printf("Found %d coreference links:\n", links.size());
        for (Pair<IntTuple, IntTuple> link : links) {
            System.err.printf("LINK (%d, %d) -> (%d, %d)\n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
        }
    }
    // 
    // save the coref output as CorefGraphAnnotation
    // 
    // cdm 2013: this block didn't seem to be doing anything needed....
    // List<List<CoreLabel>> sents = new ArrayList<List<CoreLabel>>();
    // for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    // List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    // sents.add(tokens);
    // }
    // this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
    List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
    for (Pair<IntTuple, IntTuple> link : links) {
        // 
        // Note: all offsets in the graph start at 1 (not at 0!)
        // we do this for consistency reasons, as indices for syntactic dependencies start at 1
        // 
        int srcSent = link.first.get(0);
        int srcTok = orderedMentions.get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
        int dstSent = link.second.get(0);
        int dstTok = orderedMentions.get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
        IntTuple dst = new IntTuple(2);
        dst.set(0, dstSent);
        dst.set(1, dstTok);
        IntTuple src = new IntTuple(2);
        src.set(0, srcSent);
        src.set(1, srcTok);
        graph.add(new Pair<>(src, dst));
    }
    annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
    for (CorefChain corefChain : result.values()) {
        if (corefChain.getMentionsInTextualOrder().size() < 2)
            continue;
        Set<CoreLabel> coreferentTokens = Generics.newHashSet();
        for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
            CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
            CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
            coreferentTokens.add(token);
        }
        for (CoreLabel token : coreferentTokens) {
            token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention) IntTuple(edu.stanford.nlp.util.IntTuple) CorefChain(edu.stanford.nlp.dcoref.CorefChain) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.stanford.nlp.util.Pair)

Example 2 with CorefChain

use of edu.stanford.nlp.dcoref.CorefChain in project CoreNLP by stanfordnlp.

the class DeterministicCorefAnnotator method annotate.

@Override
public void annotate(Annotation annotation) {
    // temporarily set the primary named entity tag to the coarse tag
    setNamedEntityTagGranularity(annotation, "coarse");
    if (performMentionDetection)
        mentionAnnotator.annotate(annotation);
    try {
        List<Tree> trees = new ArrayList<>();
        List<List<CoreLabel>> sentences = new ArrayList<>();
        // extract trees and sentence words
        // we are only supporting the new annotation standard for this Annotator!
        boolean hasSpeakerAnnotations = false;
        if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
            // int sentNum = 0;
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
                sentences.add(tokens);
                Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
                trees.add(tree);
                // locking here is crucial for correct threading!
                SemanticGraph dependencies = SemanticGraphFactory.makeFromTree(tree, Mode.COLLAPSED, Extras.NONE, null, true);
                sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, dependencies);
                if (!hasSpeakerAnnotations) {
                    // check for speaker annotations
                    for (CoreLabel t : tokens) {
                        if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) {
                            hasSpeakerAnnotations = true;
                            break;
                        }
                    }
                }
                MentionExtractor.mergeLabels(tree, tokens);
                MentionExtractor.initializeUtterance(tokens);
            }
        } else {
            log.error("this coreference resolution system requires SentencesAnnotation!");
            return;
        }
        if (hasSpeakerAnnotations) {
            annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
        }
        // extract all possible mentions
        // this is created for each new annotation because it is not threadsafe
        RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing);
        List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(annotation, 0, corefSystem.dictionaries());
        // add the relevant info to mentions and order them for coref
        Document document = mentionExtractor.arrange(annotation, sentences, trees, allUnprocessedMentions);
        List<List<Mention>> orderedMentions = document.getOrderedMentions();
        if (VERBOSE) {
            for (int i = 0; i < orderedMentions.size(); i++) {
                System.err.printf("Mentions in sentence #%d:%n", i);
                for (int j = 0; j < orderedMentions.get(i).size(); j++) {
                    log.info("\tMention #" + j + ": " + orderedMentions.get(i).get(j).spanToString());
                }
            }
        }
        Map<Integer, edu.stanford.nlp.coref.data.CorefChain> result = corefSystem.corefReturnHybridOutput(document);
        annotation.set(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefChainAnnotation.class, result);
        if (OLD_FORMAT) {
            Map<Integer, CorefChain> oldResult = corefSystem.coref(document);
            addObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult);
        }
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        // restore to the fine-grained
        setNamedEntityTagGranularity(annotation, "fine");
    }
}
Also used : ArrayList(java.util.ArrayList) Document(edu.stanford.nlp.dcoref.Document) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) RuleBasedCorefMentionFinder(edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder) CorefChain(edu.stanford.nlp.dcoref.CorefChain) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) List(java.util.List) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 3 with CorefChain

use of edu.stanford.nlp.dcoref.CorefChain in project CoreNLP by stanfordnlp.

the class DcorefPronounResolver method resolvePronouns.

@Override
protected HashMap<Integer, Integer> resolvePronouns(List<CoreLabel> tokens) {
    HashMap<Integer, Integer> pronPairs = new HashMap<Integer, Integer>(1);
    CoreMap sentence = new CoreLabel();
    sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
    sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, 1);
    List<CoreMap> sentences = new ArrayList<CoreMap>(1);
    sentences.add(sentence);
    Annotation annotation = new Annotation(sentences);
    pipeline.annotate(annotation);
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        CoreLabel firstRef = null;
        for (CorefMention m : chain.getMentionsInTextualOrder()) {
            CoreLabel lbl = tokens.get(m.headIndex - 1);
            if (lbl.tag().startsWith("PRP") && firstRef != null) {
                pronPairs.put(lbl.index(), firstRef.index());
            } else if (!lbl.tag().startsWith("PRP") && firstRef == null) {
                firstRef = lbl;
            }
        }
    }
    return pronPairs;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention) CorefChain(edu.stanford.nlp.dcoref.CorefChain) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CorefChain (edu.stanford.nlp.dcoref.CorefChain)3 CorefCoreAnnotations (edu.stanford.nlp.dcoref.CorefCoreAnnotations)3 CoreLabel (edu.stanford.nlp.ling.CoreLabel)3 CoreMap (edu.stanford.nlp.util.CoreMap)3 ArrayList (java.util.ArrayList)3 CorefMention (edu.stanford.nlp.dcoref.CorefChain.CorefMention)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 Document (edu.stanford.nlp.dcoref.Document)1 RuleBasedCorefMentionFinder (edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder)1 Annotation (edu.stanford.nlp.pipeline.Annotation)1 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)1 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)1 Tree (edu.stanford.nlp.trees.Tree)1 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)1 IntTuple (edu.stanford.nlp.util.IntTuple)1 Pair (edu.stanford.nlp.util.Pair)1 HashMap (java.util.HashMap)1 List (java.util.List)1