Search in sources :

Example 46 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method findParagraphSpeaker.

private static String findParagraphSpeaker(Document doc, List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
    if (!doc.speakers.containsKey(paragraphUtterIndex)) {
        if (!nextParagraphSpeaker.isEmpty()) {
            doc.speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
        } else {
            // cdm [Sept 2015] added this check to try to avoid crash
            if (paragraph.isEmpty()) {
                Redwood.log("debug-preprocessor", "Empty paragraph; skipping findParagraphSpeaker");
                return "";
            }
            CoreMap lastSent = paragraph.get(paragraph.size() - 1);
            String speaker = "";
            boolean hasVerb = false;
            for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
                CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
                String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                if (pos.startsWith("V")) {
                    hasVerb = true;
                    break;
                }
                if (ner.startsWith("PER")) {
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, i);
                    if (doc.mentionheadPositions.containsKey(headPosition)) {
                        speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
                    }
                }
            }
            if (!hasVerb && !speaker.equals("")) {
                doc.speakers.put(paragraphUtterIndex, speaker);
            }
        }
    }
    return findNextParagraphSpeaker(doc, paragraph, paragraphOffset, dict);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) IntTuple(edu.stanford.nlp.util.IntTuple) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)

Example 47 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method findSpeakersInConversation.

private static void findSpeakersInConversation(Document doc, Dictionaries dict) {
    for (List<Mention> l : doc.predictedMentions) {
        for (Mention m : l) {
            if (m.predicateNominatives == null)
                continue;
            for (Mention a : m.predicateNominatives) {
                if (a.spanToString().toLowerCase().equals("i")) {
                    doc.speakers.put(m.headWord.get(CoreAnnotations.UtteranceAnnotation.class), Integer.toString(m.mentionID));
                }
            }
        }
    }
    List<CoreMap> paragraph = new ArrayList<>();
    int paragraphUtterIndex = 0;
    String nextParagraphSpeaker = "";
    int paragraphOffset = 0;
    for (CoreMap sent : doc.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        paragraph.add(sent);
        int currentUtter = sent.get(CoreAnnotations.TokensAnnotation.class).get(0).get(CoreAnnotations.UtteranceAnnotation.class);
        if (paragraphUtterIndex != currentUtter) {
            nextParagraphSpeaker = findParagraphSpeaker(doc, paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
            paragraphUtterIndex = currentUtter;
            paragraphOffset += paragraph.size();
            paragraph = new ArrayList<>();
        }
    }
    findParagraphSpeaker(doc, paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
}
Also used : ArrayList(java.util.ArrayList) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) UtteranceAnnotation(edu.stanford.nlp.ling.CoreAnnotations.UtteranceAnnotation) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 48 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class ChineseHcorefDemo method main.

public static void main(String[] args) throws Exception {
    long startTime = System.currentTimeMillis();
    String text = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , " + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , " + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。";
    args = new String[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" };
    Annotation document = new Annotation(text);
    Properties props = StringUtils.argsToProperties(args);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);
    System.out.println("---");
    System.out.println("coref chains");
    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
        System.out.println("\t" + cc);
    }
    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
        System.out.println("---");
        System.out.println("mentions");
        for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
            System.out.println("\t" + m);
        }
    }
    long endTime = System.currentTimeMillis();
    long time = (endTime - startTime) / 1000;
    System.out.println("Running time " + time / 60 + "min " + time % 60 + "s");
}
Also used : CorefChain(edu.stanford.nlp.coref.data.CorefChain) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Properties(java.util.Properties) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Example 49 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class CoNLLMentionExtractor method recallErrors.

private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
    List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
    int numSentences = goldMentions.size();
    for (int i = 0; i < numSentences; i++) {
        CoreMap coreMap = coreMaps.get(i);
        List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
        Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
        List<Mention> goldMentionsSent = goldMentions.get(i);
        List<Pair<Integer, Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
        for (Pair<Integer, Integer> mentionSpan : goldMentionsSpans) {
            logger.finer("RECALL ERROR\n");
            logger.finer(coreMap + "\n");
            for (int x = mentionSpan.first; x < mentionSpan.second; x++) {
                logger.finer(words.get(x).value() + " ");
            }
            logger.finer("\n" + tree + "\n");
        }
    }
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.stanford.nlp.util.Pair)

Example 50 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class CoNLLMentionExtractor method nextDoc.

@Override
public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<>();
    List<Tree> allTrees = new ArrayList<>();
    CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument();
    if (conllDoc == null) {
        return null;
    }
    Annotation anno = conllDoc.getAnnotation();
    List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
            // Remove tree from annotation and replace with parse using stanford parser
            sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
        } else {
            Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (LEMMATIZE) {
                treeLemmatizer.transformTree(tree);
            }
            // generate the dependency graph
            try {
                SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.ENHANCED, GrammaticalStructure.Extras.NONE);
                SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE);
                sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
                sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, deps);
            } catch (Exception e) {
                logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
            }
        }
    }
    String preSpeaker = null;
    int utterance = -1;
    for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) {
        if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
            token.set(CoreAnnotations.SpeakerAnnotation.class, "");
        }
        String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class);
        if (!curSpeaker.equals(preSpeaker)) {
            utterance++;
            preSpeaker = curSpeaker;
        }
        token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
    }
    // Run pipeline
    stanfordProcessor.annotate(anno);
    for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
        allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class));
        allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
    }
    // Initialize gold mentions
    List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc);
    List<List<Mention>> allPredictedMentions;
    if (Constants.USE_GOLD_MENTIONS) {
        //allPredictedMentions = allGoldMentions;
        // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
        allPredictedMentions = makeCopy(allGoldMentions);
    } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) {
        allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder).filterPredictedMentions(allGoldMentions, anno, dictionaries);
    } else {
        allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);
    }
    try {
        recallErrors(allGoldMentions, allPredictedMentions, anno);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
    doc.conllDoc = conllDoc;
    return doc;
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) List(java.util.List) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IOException(java.io.IOException) Annotation(edu.stanford.nlp.pipeline.Annotation) IOException(java.io.IOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)253 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)172 CoreLabel (edu.stanford.nlp.ling.CoreLabel)102 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)53 Annotation (edu.stanford.nlp.pipeline.Annotation)49 Tree (edu.stanford.nlp.trees.Tree)28 Properties (java.util.Properties)23 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)20 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9