Search in sources :

Example 56 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CoNLL2011DocumentReader method getMention.

public static Pair<Integer, Integer> getMention(Integer index, String corefG, List<CoreLabel> sentenceAnno) {
    Integer i = -1;
    Integer end = index;
    for (CoreLabel newAnno : sentenceAnno) {
        i += 1;
        if (i > index) {
            String corefS = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
            if (corefS != null) {
                String[] allC = corefS.split("\\|");
                if (Arrays.asList(allC).contains(corefG)) {
                    end = i;
                } else {
                    break;
                }
            } else {
                break;
            }
        }
    }
    return Pair.makePair(index, end);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel)

Example 57 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CoNLLMentionExtractor method recallErrors.

private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
    List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
    int numSentences = goldMentions.size();
    for (int i = 0; i < numSentences; i++) {
        CoreMap coreMap = coreMaps.get(i);
        List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
        Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
        List<Mention> goldMentionsSent = goldMentions.get(i);
        List<Pair<Integer, Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
        for (Pair<Integer, Integer> mentionSpan : goldMentionsSpans) {
            logger.finer("RECALL ERROR\n");
            logger.finer(coreMap + "\n");
            for (int x = mentionSpan.first; x < mentionSpan.second; x++) {
                logger.finer(words.get(x).value() + " ");
            }
            logger.finer("\n" + tree + "\n");
        }
    }
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.stanford.nlp.util.Pair)

Example 58 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class CoNLLMentionExtractor method nextDoc.

@Override
public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<>();
    List<Tree> allTrees = new ArrayList<>();
    CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument();
    if (conllDoc == null) {
        return null;
    }
    Annotation anno = conllDoc.getAnnotation();
    List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
            // Remove tree from annotation and replace with parse using stanford parser
            sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
        } else {
            Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (LEMMATIZE) {
                treeLemmatizer.transformTree(tree);
            }
            // generate the dependency graph
            try {
                SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.ENHANCED, GrammaticalStructure.Extras.NONE);
                SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE);
                sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
                sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, deps);
            } catch (Exception e) {
                logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
            }
        }
    }
    String preSpeaker = null;
    int utterance = -1;
    for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) {
        if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
            token.set(CoreAnnotations.SpeakerAnnotation.class, "");
        }
        String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class);
        if (!curSpeaker.equals(preSpeaker)) {
            utterance++;
            preSpeaker = curSpeaker;
        }
        token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
    }
    // Run pipeline
    stanfordProcessor.annotate(anno);
    for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
        allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class));
        allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
    }
    // Initialize gold mentions
    List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc);
    List<List<Mention>> allPredictedMentions;
    if (Constants.USE_GOLD_MENTIONS) {
        //allPredictedMentions = allGoldMentions;
        // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
        allPredictedMentions = makeCopy(allGoldMentions);
    } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) {
        allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder).filterPredictedMentions(allGoldMentions, anno, dictionaries);
    } else {
        allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);
    }
    try {
        recallErrors(allGoldMentions, allPredictedMentions, anno);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
    doc.conllDoc = conllDoc;
    return doc;
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) List(java.util.List) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IOException(java.io.IOException) Annotation(edu.stanford.nlp.pipeline.Annotation) IOException(java.io.IOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 59 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class MentionDetectionClassifier method extractFeatures.

public static Counter<String> extractFeatures(Mention p, Set<Mention> shares, Set<String> neStrings, Dictionaries dict, Properties props) {
    Counter<String> features = new ClassicCounter<>();
    String span = p.lowercaseNormalizedSpanString();
    String ner = p.headWord.ner();
    int sIdx = p.startIndex;
    int eIdx = p.endIndex;
    List<CoreLabel> sent = p.sentenceWords;
    CoreLabel preWord = (sIdx == 0) ? null : sent.get(sIdx - 1);
    CoreLabel nextWord = (eIdx == sent.size()) ? null : sent.get(eIdx);
    CoreLabel firstWord = p.originalSpan.get(0);
    CoreLabel lastWord = p.originalSpan.get(p.originalSpan.size() - 1);
    features.incrementCount("B-NETYPE-" + ner);
    if (neStrings.contains(span)) {
        features.incrementCount("B-NE-STRING-EXIST");
        if ((preWord == null || !preWord.ner().equals(ner)) && (nextWord == null || !nextWord.ner().equals(ner))) {
            features.incrementCount("B-NE-FULLSPAN");
        }
    }
    if (preWord != null)
        features.incrementCount("B-PRECEDINGWORD-" + preWord.word());
    if (nextWord != null)
        features.incrementCount("B-FOLLOWINGWORD-" + nextWord.word());
    if (preWord != null)
        features.incrementCount("B-PRECEDINGPOS-" + preWord.tag());
    if (nextWord != null)
        features.incrementCount("B-FOLLOWINGPOS-" + nextWord.tag());
    features.incrementCount("B-FIRSTWORD-" + firstWord.word());
    features.incrementCount("B-FIRSTPOS-" + firstWord.tag());
    features.incrementCount("B-LASTWORD-" + lastWord.word());
    features.incrementCount("B-LASTWORD-" + lastWord.tag());
    for (Mention s : shares) {
        if (s == p)
            continue;
        if (s.insideIn(p)) {
            features.incrementCount("B-BIGGER-THAN-ANOTHER");
            break;
        }
    }
    for (Mention s : shares) {
        if (s == p)
            continue;
        if (p.insideIn(s)) {
            features.incrementCount("B-SMALLER-THAN-ANOTHER");
            break;
        }
    }
    return features;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter)

Example 60 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class MentionDetectionClassifier method classifyMentions.

public void classifyMentions(List<List<Mention>> predictedMentions, Dictionaries dict, Properties props) {
    Set<String> neStrings = Generics.newHashSet();
    for (List<Mention> predictedMention : predictedMentions) {
        for (Mention m : predictedMention) {
            String ne = m.headWord.ner();
            if (ne.equals("O"))
                continue;
            for (CoreLabel cl : m.originalSpan) {
                if (!cl.ner().equals(ne))
                    continue;
            }
            neStrings.add(m.lowercaseNormalizedSpanString());
        }
    }
    for (List<Mention> predicts : predictedMentions) {
        Map<Integer, Set<Mention>> headPositions = Generics.newHashMap();
        for (Mention p : predicts) {
            if (!headPositions.containsKey(p.headIndex))
                headPositions.put(p.headIndex, Generics.newHashSet());
            headPositions.get(p.headIndex).add(p);
        }
        Set<Mention> remove = Generics.newHashSet();
        for (int hPos : headPositions.keySet()) {
            Set<Mention> shares = headPositions.get(hPos);
            if (shares.size() > 1) {
                Counter<Mention> probs = new ClassicCounter<>();
                for (Mention p : shares) {
                    double trueProb = probabilityOf(p, shares, neStrings, dict, props);
                    probs.incrementCount(p, trueProb);
                }
                // add to remove
                Mention keep = Counters.argmax(probs, (m1, m2) -> m1.spanToString().compareTo(m2.spanToString()));
                probs.remove(keep);
                remove.addAll(probs.keySet());
            }
        }
        for (Mention r : remove) {
            predicts.remove(r);
        }
    }
}
Also used : Set(java.util.Set) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)536 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)311 CoreMap (edu.stanford.nlp.util.CoreMap)103 ArrayList (java.util.ArrayList)102 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)32 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)25 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 StringReader (java.io.StringReader)20 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18