Search in sources :

Example 46 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class SemgrexPatternITest method testNERStanfordDependencies.

@Test
public void testNERStanfordDependencies() throws Exception {
    String sentence = "John lives in Washington.";
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
    props.setProperty("parse.originalDependencies", "true");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation doc = new Annotation(sentence);
    pipeline.annotate(doc);
    CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
    SemanticGraph graph = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
    graph.prettyPrint();
    String patStr = "({word:/lives/} >/prep_in/ {word:/\\QCalifornia\\E|\\QWashington\\E/} >nsubj {ner:PERSON})";
    SemgrexPattern pat = SemgrexPattern.compile(patStr);
    SemgrexMatcher mat = pat.matcher(graph, true);
    assertTrue(mat.find());
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) Test(org.junit.Test)

Example 47 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class HeidelTimeITest method runHeidelTimeEnglish.

@Test
public void runHeidelTimeEnglish() throws Exception {
    String text = "On Monday, some cataclysmic news about a a release last Christmas was released.";
    Annotation ann = new Annotation(text);
    String date = "2017-07-07";
    ann.set(CoreAnnotations.DocDateAnnotation.class, date);
    String heideltimeEnv = System.getenv("HEIDELTIME_PATH");
    if (heideltimeEnv == null) {
        heideltimeEnv = DEFAULT_HEIDELTIME_LOCATION;
    }
    Properties defaultProps = new Properties();
    defaultProps.load(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem("edu/stanford/nlp/pipeline/StanfordCoreNLP.properties"));
    Properties props = new Properties(defaultProps);
    props.setProperty("customAnnotatorClass.heideltime", "edu.stanford.nlp.time.HeidelTimeAnnotator");
    props.setProperty(HeidelTimeAnnotator.HEIDELTIME_PATH_PROPERTY, heideltimeEnv);
    props.setProperty(HeidelTimeAnnotator.HEIDELTIME_LANGUAGE_PROPERTY, "english");
    props.setProperty("annotators", "tokenize,ssplit,heideltime");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);
    List<CoreMap> outputs = ann.get(TimeAnnotations.TimexAnnotations.class);
    Assert.assertEquals(2, outputs.size());
    Assert.assertEquals("Monday", outputs.get(0).get(TimeAnnotations.TimexAnnotation.class).text());
    Assert.assertEquals("2017-07-03", outputs.get(0).get(TimeAnnotations.TimexAnnotation.class).value());
    Assert.assertEquals("Christmas", outputs.get(1).get(TimeAnnotations.TimexAnnotation.class).text());
    Assert.assertEquals("2016-12-25", outputs.get(1).get(TimeAnnotations.TimexAnnotation.class).value());
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Test(org.junit.Test)

Example 48 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class AceReader method main.

// simple testing code
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
    r.setLoggerLevel(Level.INFO);
    r.parse("/u/scr/nlp/data/ACE2005/");
    // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
    // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
    log.info("done");
}
Also used : Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Example 49 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class SentimentPipeline method main.

/**
 * Runs the tree-based sentiment model on some text.
 */
public static void main(String[] args) throws IOException {
    String parserModel = null;
    String sentimentModel = null;
    String filename = null;
    String fileList = null;
    boolean stdin = false;
    boolean filterUnknown = false;
    List<Output> outputFormats = Collections.singletonList(Output.ROOT);
    Input inputFormat = Input.TEXT;
    String tlppClass = DEFAULT_TLPP_CLASS;
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
            sentimentModel = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
            parserModel = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-file")) {
            filename = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-fileList")) {
            fileList = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-stdin")) {
            stdin = true;
            argIndex++;
        } else if (args[argIndex].equalsIgnoreCase("-input")) {
            inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase(Locale.ROOT));
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-output")) {
            String[] formats = args[argIndex + 1].split(",");
            outputFormats = new ArrayList<>();
            for (String format : formats) {
                outputFormats.add(Output.valueOf(format.toUpperCase(Locale.ROOT)));
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
            filterUnknown = true;
            argIndex++;
        } else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
            tlppClass = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-help")) {
            help();
            System.exit(0);
        } else {
            log.info("Unknown argument " + args[argIndex + 1]);
            help();
            throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
        }
    }
    // We construct two pipelines.  One handles tokenization, if
    // necessary.  The other takes tokenized sentences and converts
    // them to sentiment trees.
    Properties pipelineProps = new Properties();
    Properties tokenizerProps = null;
    if (sentimentModel != null) {
        pipelineProps.setProperty("sentiment.model", sentimentModel);
    }
    if (parserModel != null) {
        pipelineProps.setProperty("parse.model", parserModel);
    }
    if (inputFormat == Input.TREES) {
        pipelineProps.setProperty("annotators", "binarizer, sentiment");
        pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
        pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
        pipelineProps.setProperty("enforceRequirements", "false");
    } else {
        pipelineProps.setProperty("annotators", "parse, sentiment");
        pipelineProps.setProperty("parse.binaryTrees", "true");
        pipelineProps.setProperty("parse.buildgraphs", "false");
        pipelineProps.setProperty("enforceRequirements", "false");
        tokenizerProps = new Properties();
        tokenizerProps.setProperty("annotators", "tokenize, ssplit");
    }
    if (stdin && tokenizerProps != null) {
        tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
    }
    int count = 0;
    if (filename != null)
        count++;
    if (fileList != null)
        count++;
    if (stdin)
        count++;
    if (count > 1) {
        throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
    }
    if (count == 0) {
        throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
    }
    StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
    if (filename != null) {
        // Process a file.  The pipeline will do tokenization, which
        // means it will split it into sentences as best as possible
        // with the tokenizer.
        List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
        for (Annotation annotation : annotations) {
            pipeline.annotate(annotation);
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                System.out.println(sentence);
                outputTree(System.out, sentence, outputFormats);
            }
        }
    } else if (fileList != null) {
        // for each file.
        for (String file : fileList.split(",")) {
            List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
            FileOutputStream fout = new FileOutputStream(file + ".out");
            PrintStream pout = new PrintStream(fout);
            for (Annotation annotation : annotations) {
                pipeline.annotate(annotation);
                for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    pout.println(sentence);
                    outputTree(pout, sentence, outputFormats);
                }
            }
            pout.flush();
            fout.close();
        }
    } else {
        // Process stdin.  Each line will be treated as a single sentence.
        log.info("Reading in text from stdin.");
        log.info("Please enter one sentence per line.");
        log.info("Processing will end when EOF is reached.");
        BufferedReader reader = IOUtils.readerFromStdin("utf-8");
        for (String line; (line = reader.readLine()) != null; ) {
            line = line.trim();
            if (!line.isEmpty()) {
                Annotation annotation = tokenizer.process(line);
                pipeline.annotate(annotation);
                for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    outputTree(System.out, sentence, outputFormats);
                }
            } else {
                // Output blank lines for blank lines so the tool can be
                // used for line-by-line text processing
                System.out.println();
            }
        }
    }
}
Also used : PrintStream(java.io.PrintStream) Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) FileOutputStream(java.io.FileOutputStream) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 50 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class SimplePronounResolution method loadPipeline.

private void loadPipeline() {
    Properties props = new Properties();
    props.setProperty("annotators", "lemma,parse");
    props.setProperty("parse.model", SceneGraphImagePCFGParser.PCFG_MODEL);
    props.setProperty("enforceRequirements", "false");
    pipeline = new StanfordCoreNLP(props);
}
Also used : Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Aggregations

StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)71 Properties (java.util.Properties)44 Annotation (edu.stanford.nlp.pipeline.Annotation)40 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)33 CoreMap (edu.stanford.nlp.util.CoreMap)33 Test (org.junit.Test)15 CoreLabel (edu.stanford.nlp.ling.CoreLabel)12 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)6 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)6 StanfordTextProcessor (com.graphaware.nlp.processor.stanford.StanfordTextProcessor)5 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)5 PrintWriter (java.io.PrintWriter)5 ArrayList (java.util.ArrayList)5 AnnotatedText (com.graphaware.nlp.domain.AnnotatedText)3 CorefChain (edu.stanford.nlp.coref.data.CorefChain)3 GoldAnswerAnnotation (edu.stanford.nlp.ling.CoreAnnotations.GoldAnswerAnnotation)3 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)3 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)3