Search in sources :

Example 31 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class CoNLLMentionExtractor method nextDoc.

@Override
public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<>();
    List<Tree> allTrees = new ArrayList<>();
    CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument();
    if (conllDoc == null) {
        return null;
    }
    Annotation anno = conllDoc.getAnnotation();
    List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
            // Remove tree from annotation and replace with parse using stanford parser
            sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
        } else {
            Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (LEMMATIZE) {
                treeLemmatizer.transformTree(tree);
            }
            // generate the dependency graph
            try {
                SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.ENHANCED, GrammaticalStructure.Extras.NONE);
                SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE);
                sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
                sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, deps);
            } catch (Exception e) {
                logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
            }
        }
    }
    String preSpeaker = null;
    int utterance = -1;
    for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) {
        if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
            token.set(CoreAnnotations.SpeakerAnnotation.class, "");
        }
        String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class);
        if (!curSpeaker.equals(preSpeaker)) {
            utterance++;
            preSpeaker = curSpeaker;
        }
        token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
    }
    // Run pipeline
    stanfordProcessor.annotate(anno);
    for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
        allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class));
        allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
    }
    // Initialize gold mentions
    List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc);
    List<List<Mention>> allPredictedMentions;
    if (Constants.USE_GOLD_MENTIONS) {
        // allPredictedMentions = allGoldMentions;
        // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
        allPredictedMentions = makeCopy(allGoldMentions);
    } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) {
        allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder).filterPredictedMentions(allGoldMentions, anno, dictionaries);
    } else {
        allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);
    }
    try {
        recallErrors(allGoldMentions, allPredictedMentions, anno);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
    doc.conllDoc = conllDoc;
    return doc;
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) List(java.util.List) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IOException(java.io.IOException) Annotation(edu.stanford.nlp.pipeline.Annotation) IOException(java.io.IOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 32 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class ChineseHcorefDemo method main.

public static void main(String[] args) throws Exception {
    long startTime = System.currentTimeMillis();
    String text = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , " + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , " + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。";
    args = new String[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" };
    Annotation document = new Annotation(text);
    Properties props = StringUtils.argsToProperties(args);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);
    System.out.println("---");
    System.out.println("coref chains");
    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
        System.out.println("\t" + cc);
    }
    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
        System.out.println("---");
        System.out.println("mentions");
        for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
            System.out.println("\t" + m);
        }
    }
    long endTime = System.currentTimeMillis();
    long time = (endTime - startTime) / 1000;
    System.out.println("Running time " + time / 60 + "min " + time % 60 + "s");
}
Also used : CorefChain(edu.stanford.nlp.coref.data.CorefChain) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Properties(java.util.Properties) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Example 33 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class RunParagraphAnnotator method runTest.

public static void runTest(String test, String num) {
    System.out.println("Testing: " + test + " : num newline breaks: " + num);
    Annotation ann = new Annotation(test);
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize,ssplit");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);
    Properties propsPara = new Properties();
    propsPara.setProperty("paragraphBreak", num);
    ParagraphAnnotator para = new ParagraphAnnotator(propsPara, true);
    para.annotate(ann);
    for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
        System.out.println(sent);
        System.out.println(sent.get(CoreAnnotations.ParagraphIndexAnnotation.class));
    }
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Example 34 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class SemgrexPatternITest method testNERUniversalDependencies.

@Test
public void testNERUniversalDependencies() throws Exception {
    String sentence = "John lives in Washington.";
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    props.setProperty("parse.originalDependencies", "false");
    Annotation doc = new Annotation(sentence);
    pipeline.annotate(doc);
    CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
    SemanticGraph graph = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
    graph.prettyPrint();
    String patStr = "({word:/lives/} >/obl:in/ {word:/\\QCalifornia\\E|\\QWashington\\E/} >nsubj {ner:PERSON})";
    SemgrexPattern pat = SemgrexPattern.compile(patStr);
    SemgrexMatcher mat = pat.matcher(graph, true);
    assertTrue(mat.find());
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) Test(org.junit.Test)

Example 35 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class HeidelTimeITest method runHeidelTimeSpanish.

@Test
public void runHeidelTimeSpanish() throws Exception {
    String text = "El lunes, algunas noticias cataclísmicas sobre un lanzamiento de la Navidad pasada fueron liberadas.";
    Annotation ann = new Annotation(text);
    String date = "2017-07-07";
    ann.set(CoreAnnotations.DocDateAnnotation.class, date);
    String heideltimeEnv = System.getenv("HEIDELTIME_PATH");
    if (heideltimeEnv == null) {
        heideltimeEnv = DEFAULT_HEIDELTIME_LOCATION;
    }
    Properties defaultProps = new Properties();
    defaultProps.load(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem("edu/stanford/nlp/pipeline/StanfordCoreNLP-spanish.properties"));
    Properties props = new Properties(defaultProps);
    props.setProperty("customAnnotatorClass.heideltime", "edu.stanford.nlp.time.HeidelTimeAnnotator");
    props.setProperty(HeidelTimeAnnotator.HEIDELTIME_PATH_PROPERTY, heideltimeEnv);
    props.setProperty(HeidelTimeAnnotator.HEIDELTIME_LANGUAGE_PROPERTY, "spanish");
    props.setProperty("annotators", "tokenize,ssplit,heideltime");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);
    List<CoreMap> outputs = ann.get(TimeAnnotations.TimexAnnotations.class);
    // Unfortunately, HeidelTime doesn't get Navidad :-(
    Assert.assertEquals(1, outputs.size());
    Assert.assertEquals("El lunes", outputs.get(0).get(TimeAnnotations.TimexAnnotation.class).text());
    Assert.assertEquals("2017-07-03", outputs.get(0).get(TimeAnnotations.TimexAnnotation.class).value());
// Assert.assertEquals("Navidad", outputs.get(1).get(TimeAnnotations.TimexAnnotation.class).text());
// Assert.assertEquals("2016-12-25", outputs.get(1).get(TimeAnnotations.TimexAnnotation.class).value());
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Test(org.junit.Test)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)138 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)84 CoreMap (edu.stanford.nlp.util.CoreMap)77 CoreLabel (edu.stanford.nlp.ling.CoreLabel)48 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)31 ArrayList (java.util.ArrayList)31 Properties (java.util.Properties)28 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 Test (org.junit.Test)18 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)15 Tree (edu.stanford.nlp.trees.Tree)14 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)12 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)11 IOException (java.io.IOException)11 CorefChain (edu.stanford.nlp.coref.data.CorefChain)10 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)10