Search in sources :

Example 36 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method addGoldMentions.

// temporary for debug
protected static void addGoldMentions(List<CoreMap> sentences, List<Set<IntPair>> mentionSpanSetList, List<List<Mention>> predictedMentions, List<List<Mention>> allGoldMentions) {
    for (int i = 0, sz = sentences.size(); i < sz; i++) {
        List<Mention> mentions = predictedMentions.get(i);
        CoreMap sent = sentences.get(i);
        List<CoreLabel> tokens = sent.get(TokensAnnotation.class);
        Set<IntPair> mentionSpanSet = mentionSpanSetList.get(i);
        List<Mention> golds = allGoldMentions.get(i);
        for (Mention g : golds) {
            IntPair pair = new IntPair(g.startIndex, g.endIndex);
            if (!mentionSpanSet.contains(pair)) {
                int dummyMentionId = -1;
                Mention m = new Mention(dummyMentionId, g.startIndex, g.endIndex, tokens, sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) != null ? sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) : sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), new ArrayList<>(tokens.subList(g.startIndex, g.endIndex)));
                mentions.add(m);
                mentionSpanSet.add(pair);
            }
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) IntPair(edu.stanford.nlp.util.IntPair) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 37 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method extractEnumerations.

protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
        matcher.getMatch();
        Tree m1 = matcher.getNode("m1");
        Tree m2 = matcher.getNode("m2");
        List<Tree> mLeaves = m1.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
        mLeaves = m2.getLeaves();
        beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
    }
    for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
        IntPair span = spanMention.getKey();
        if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
            mentions.add(m);
            mentionSpanSet.add(span);
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Map(java.util.Map) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 38 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method sentenceStringWithMention.

public static String sentenceStringWithMention(int i, Document document, boolean gold, boolean printClusterID) {
    StringBuilder sentStr = new StringBuilder();
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldMentions;
    } else {
        allMentions = document.predictedMentions;
    }
    //    String filename = document.annotation.get()
    int previousOffset = 0;
    CoreMap sentence = sentences.get(i);
    List<Mention> mentions = allMentions.get(i);
    List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
    String speaker = t.get(0).get(SpeakerAnnotation.class);
    if (NumberMatchingRegex.isDecimalInteger(speaker))
        speaker = speaker + ": " + document.predictedMentionsByID.get(Integer.parseInt(speaker)).spanToString();
    sentStr.append("\tspeaker: " + speaker + " (" + t.get(0).get(UtteranceAnnotation.class) + ") ");
    String[] tokens = new String[t.size()];
    for (CoreLabel c : t) {
        tokens[c.index() - 1] = c.word();
    }
    //    if(previousOffset+2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && printClusterID) {
    //      sentStr.append("\n");
    //    }
    previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Counter<Integer> startCounts = new ClassicCounter<>();
    Counter<Integer> endCounts = new ClassicCounter<>();
    Map<Integer, Deque<Mention>> endMentions = Generics.newHashMap();
    for (Mention m : mentions) {
        //      if(!gold && (document.corefClusters.get(m.corefClusterID)==null || document.corefClusters.get(m.corefClusterID).getCorefMentions().size()<=1)) {
        //        continue;
        //      }
        startCounts.incrementCount(m.startIndex);
        endCounts.incrementCount(m.endIndex);
        if (!endMentions.containsKey(m.endIndex))
            endMentions.put(m.endIndex, new ArrayDeque<>());
        endMentions.get(m.endIndex).push(m);
    }
    for (int j = 0; j < tokens.length; j++) {
        if (endMentions.containsKey(j)) {
            for (Mention m : endMentions.get(j)) {
                int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                id = (printClusterID) ? id : m.mentionID;
                sentStr.append("]_").append(id);
            }
        }
        for (int k = 0; k < startCounts.getCount(j); k++) {
            if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
                sentStr.append(" ");
            sentStr.append("[");
        }
        if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
            sentStr.append(" ");
        sentStr.append(tokens[j]);
    }
    if (endMentions.containsKey(tokens.length)) {
        for (Mention m : endMentions.get(tokens.length)) {
            int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
            id = (printClusterID) ? id : m.mentionID;
            //append("_").append(m.mentionID);
            sentStr.append("]_").append(id);
        }
    }
    return sentStr.toString();
}
Also used : UtteranceAnnotation(edu.stanford.nlp.ling.CoreAnnotations.UtteranceAnnotation) Deque(java.util.Deque) ArrayDeque(java.util.ArrayDeque) ArrayDeque(java.util.ArrayDeque) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 39 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method findTreeWithSpan.

private static Tree findTreeWithSpan(Tree tree, int start, int end) {
    CoreLabel l = (CoreLabel) tree.label();
    if (l != null && l.containsKey(CoreAnnotations.BeginIndexAnnotation.class) && l.containsKey(CoreAnnotations.EndIndexAnnotation.class)) {
        int myStart = l.get(CoreAnnotations.BeginIndexAnnotation.class);
        int myEnd = l.get(CoreAnnotations.EndIndexAnnotation.class);
        if (start == myStart && end == myEnd) {
            // found perfect match
            return tree;
        } else if (end < myStart) {
            return null;
        } else if (start >= myEnd) {
            return null;
        }
    }
    // otherwise, check inside children - a match is possible
    for (Tree kid : tree.children()) {
        if (kid == null)
            continue;
        Tree ret = findTreeWithSpan(kid, start, end);
        // found matching child
        if (ret != null)
            return ret;
    }
    // no match
    return null;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 40 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method convertToCoreLabels.

// This probably isn't needed now; everything is always a core label. But no-op.
private static void convertToCoreLabels(Tree tree) {
    Label l = tree.label();
    if (!(l instanceof CoreLabel)) {
        CoreLabel cl = new CoreLabel();
        cl.setValue(l.value());
        tree.setLabel(cl);
    }
    for (Tree kid : tree.children()) {
        convertToCoreLabels(kid);
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)536 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)311 CoreMap (edu.stanford.nlp.util.CoreMap)103 ArrayList (java.util.ArrayList)102 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)32 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)25 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 StringReader (java.io.StringReader)20 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18