use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class SemgrexPatternITest method testNERStanfordDependencies.
@Test
public void testNERStanfordDependencies() throws Exception {
String sentence = "John lives in Washington.";
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
props.setProperty("parse.originalDependencies", "true");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
Annotation doc = new Annotation(sentence);
pipeline.annotate(doc);
CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
SemanticGraph graph = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
graph.prettyPrint();
String patStr = "({word:/lives/} >/prep_in/ {word:/\\QCalifornia\\E|\\QWashington\\E/} >nsubj {ner:PERSON})";
SemgrexPattern pat = SemgrexPattern.compile(patStr);
SemgrexMatcher mat = pat.matcher(graph, true);
assertTrue(mat.find());
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class HeidelTimeITest method runHeidelTimeEnglish.
@Test
public void runHeidelTimeEnglish() throws Exception {
String text = "On Monday, some cataclysmic news about a a release last Christmas was released.";
Annotation ann = new Annotation(text);
String date = "2017-07-07";
ann.set(CoreAnnotations.DocDateAnnotation.class, date);
String heideltimeEnv = System.getenv("HEIDELTIME_PATH");
if (heideltimeEnv == null) {
heideltimeEnv = DEFAULT_HEIDELTIME_LOCATION;
}
Properties defaultProps = new Properties();
defaultProps.load(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem("edu/stanford/nlp/pipeline/StanfordCoreNLP.properties"));
Properties props = new Properties(defaultProps);
props.setProperty("customAnnotatorClass.heideltime", "edu.stanford.nlp.time.HeidelTimeAnnotator");
props.setProperty(HeidelTimeAnnotator.HEIDELTIME_PATH_PROPERTY, heideltimeEnv);
props.setProperty(HeidelTimeAnnotator.HEIDELTIME_LANGUAGE_PROPERTY, "english");
props.setProperty("annotators", "tokenize,ssplit,heideltime");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(ann);
List<CoreMap> outputs = ann.get(TimeAnnotations.TimexAnnotations.class);
Assert.assertEquals(2, outputs.size());
Assert.assertEquals("Monday", outputs.get(0).get(TimeAnnotations.TimexAnnotation.class).text());
Assert.assertEquals("2017-07-03", outputs.get(0).get(TimeAnnotations.TimexAnnotation.class).value());
Assert.assertEquals("Christmas", outputs.get(1).get(TimeAnnotations.TimexAnnotation.class).text());
Assert.assertEquals("2016-12-25", outputs.get(1).get(TimeAnnotations.TimexAnnotation.class).value());
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class AceReader method main.
// simple testing code
public static void main(String[] args) throws IOException {
Properties props = StringUtils.argsToProperties(args);
AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
r.setLoggerLevel(Level.INFO);
r.parse("/u/scr/nlp/data/ACE2005/");
// Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
// BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
log.info("done");
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class SentimentPipeline method main.
/**
* Runs the tree-based sentiment model on some text.
*/
public static void main(String[] args) throws IOException {
String parserModel = null;
String sentimentModel = null;
String filename = null;
String fileList = null;
boolean stdin = false;
boolean filterUnknown = false;
List<Output> outputFormats = Collections.singletonList(Output.ROOT);
Input inputFormat = Input.TEXT;
String tlppClass = DEFAULT_TLPP_CLASS;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
sentimentModel = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
parserModel = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-file")) {
filename = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-fileList")) {
fileList = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-stdin")) {
stdin = true;
argIndex++;
} else if (args[argIndex].equalsIgnoreCase("-input")) {
inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase(Locale.ROOT));
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-output")) {
String[] formats = args[argIndex + 1].split(",");
outputFormats = new ArrayList<>();
for (String format : formats) {
outputFormats.add(Output.valueOf(format.toUpperCase(Locale.ROOT)));
}
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
filterUnknown = true;
argIndex++;
} else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
tlppClass = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-help")) {
help();
System.exit(0);
} else {
log.info("Unknown argument " + args[argIndex + 1]);
help();
throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
}
}
// We construct two pipelines. One handles tokenization, if
// necessary. The other takes tokenized sentences and converts
// them to sentiment trees.
Properties pipelineProps = new Properties();
Properties tokenizerProps = null;
if (sentimentModel != null) {
pipelineProps.setProperty("sentiment.model", sentimentModel);
}
if (parserModel != null) {
pipelineProps.setProperty("parse.model", parserModel);
}
if (inputFormat == Input.TREES) {
pipelineProps.setProperty("annotators", "binarizer, sentiment");
pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
pipelineProps.setProperty("enforceRequirements", "false");
} else {
pipelineProps.setProperty("annotators", "parse, sentiment");
pipelineProps.setProperty("parse.binaryTrees", "true");
pipelineProps.setProperty("parse.buildgraphs", "false");
pipelineProps.setProperty("enforceRequirements", "false");
tokenizerProps = new Properties();
tokenizerProps.setProperty("annotators", "tokenize, ssplit");
}
if (stdin && tokenizerProps != null) {
tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
}
int count = 0;
if (filename != null)
count++;
if (fileList != null)
count++;
if (stdin)
count++;
if (count > 1) {
throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
}
if (count == 0) {
throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
}
StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
if (filename != null) {
// Process a file. The pipeline will do tokenization, which
// means it will split it into sentences as best as possible
// with the tokenizer.
List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
for (Annotation annotation : annotations) {
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println(sentence);
outputTree(System.out, sentence, outputFormats);
}
}
} else if (fileList != null) {
// for each file.
for (String file : fileList.split(",")) {
List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
FileOutputStream fout = new FileOutputStream(file + ".out");
PrintStream pout = new PrintStream(fout);
for (Annotation annotation : annotations) {
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
pout.println(sentence);
outputTree(pout, sentence, outputFormats);
}
}
pout.flush();
fout.close();
}
} else {
// Process stdin. Each line will be treated as a single sentence.
log.info("Reading in text from stdin.");
log.info("Please enter one sentence per line.");
log.info("Processing will end when EOF is reached.");
BufferedReader reader = IOUtils.readerFromStdin("utf-8");
for (String line; (line = reader.readLine()) != null; ) {
line = line.trim();
if (!line.isEmpty()) {
Annotation annotation = tokenizer.process(line);
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
outputTree(System.out, sentence, outputFormats);
}
} else {
// Output blank lines for blank lines so the tool can be
// used for line-by-line text processing
System.out.println();
}
}
}
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class SimplePronounResolution method loadPipeline.
private void loadPipeline() {
Properties props = new Properties();
props.setProperty("annotators", "lemma,parse");
props.setProperty("parse.model", SceneGraphImagePCFGParser.PCFG_MODEL);
props.setProperty("enforceRequirements", "false");
pipeline = new StanfordCoreNLP(props);
}
Aggregations