use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class OpenIEServlet method doGet.
/**
* Actually perform the GET request, given all the relevant information (already sanity checked).
* This is the meat of the servlet code.
* @param out The writer to write the output to.
* @param q The query string.
*/
private void doGet(PrintWriter out, String q) {
// Clean the string a bit
q = q.trim();
if (q.length() == 0) {
return;
}
char lastChar = q.charAt(q.length() - 1);
if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
q = q + ".";
}
// Annotate
Annotation ann = new Annotation(q);
try {
// Collect results
Set<String> entailments = new HashSet<>();
Set<String> triples = new LinkedHashSet<>();
// pipeline must come before backoff
runWithPipeline(pipeline, ann, triples, entailments);
if (triples.size() == 0) {
// backoff must come after pipeline
runWithPipeline(backoff, ann, triples, entailments);
}
// Write results
out.println("{ " + "\"ok\":true, " + "\"entailments\": [" + StringUtils.join(entailments, ",") + "], " + "\"triples\": [" + StringUtils.join(triples, ",") + "], " + "\"msg\": \"\"" + " }");
} catch (Throwable t) {
out.println("{ok:false, entailments:[], triples:[], msg:" + quote(t.getMessage()) + "}");
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class TokensRegexDemo method main.
public static void main(String[] args) throws IOException {
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
}
PrintWriter out;
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation;
if (args.length > 1) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
} else {
annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
}
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
for (MatchedExpression matched : matchedExpressions) {
// Print out matched text and value
out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
// Print out token information
CoreMap cm = matched.getAnnotation();
for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
out.println(" Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
}
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class TokensRegexRetokenizeDemo method runPipeline.
public static void runPipeline(StanfordCoreNLP pipeline, String text, PrintWriter out) {
Annotation annotation = new Annotation(text);
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// Print out token annotations
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
// Print out words, lemma, ne, and normalized ne
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
out.println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class SentimentPipeline method main.
/** Runs the tree-based sentiment model on some text. */
public static void main(String[] args) throws IOException {
String parserModel = null;
String sentimentModel = null;
String filename = null;
String fileList = null;
boolean stdin = false;
boolean filterUnknown = false;
List<Output> outputFormats = Collections.singletonList(Output.ROOT);
Input inputFormat = Input.TEXT;
String tlppClass = DEFAULT_TLPP_CLASS;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
sentimentModel = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
parserModel = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-file")) {
filename = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-fileList")) {
fileList = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-stdin")) {
stdin = true;
argIndex++;
} else if (args[argIndex].equalsIgnoreCase("-input")) {
inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase());
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-output")) {
String[] formats = args[argIndex + 1].split(",");
outputFormats = new ArrayList<>();
for (String format : formats) {
outputFormats.add(Output.valueOf(format.toUpperCase()));
}
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
filterUnknown = true;
argIndex++;
} else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
tlppClass = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-help")) {
help();
System.exit(0);
} else {
log.info("Unknown argument " + args[argIndex + 1]);
help();
throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
}
}
// We construct two pipelines. One handles tokenization, if
// necessary. The other takes tokenized sentences and converts
// them to sentiment trees.
Properties pipelineProps = new Properties();
Properties tokenizerProps = null;
if (sentimentModel != null) {
pipelineProps.setProperty("sentiment.model", sentimentModel);
}
if (parserModel != null) {
pipelineProps.setProperty("parse.model", parserModel);
}
if (inputFormat == Input.TREES) {
pipelineProps.setProperty("annotators", "binarizer, sentiment");
pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
pipelineProps.setProperty("enforceRequirements", "false");
} else {
pipelineProps.setProperty("annotators", "parse, sentiment");
pipelineProps.setProperty("enforceRequirements", "false");
tokenizerProps = new Properties();
tokenizerProps.setProperty("annotators", "tokenize, ssplit");
}
if (stdin && tokenizerProps != null) {
tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
}
int count = 0;
if (filename != null)
count++;
if (fileList != null)
count++;
if (stdin)
count++;
if (count > 1) {
throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
}
if (count == 0) {
throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
}
StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
if (filename != null) {
// Process a file. The pipeline will do tokenization, which
// means it will split it into sentences as best as possible
// with the tokenizer.
List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
for (Annotation annotation : annotations) {
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println(sentence);
outputTree(System.out, sentence, outputFormats);
}
}
} else if (fileList != null) {
// for each file.
for (String file : fileList.split(",")) {
List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
FileOutputStream fout = new FileOutputStream(file + ".out");
PrintStream pout = new PrintStream(fout);
for (Annotation annotation : annotations) {
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
pout.println(sentence);
outputTree(pout, sentence, outputFormats);
}
}
pout.flush();
fout.close();
}
} else {
// Process stdin. Each line will be treated as a single sentence.
log.info("Reading in text from stdin.");
log.info("Please enter one sentence per line.");
log.info("Processing will end when EOF is reached.");
BufferedReader reader = IOUtils.readerFromStdin("utf-8");
for (String line; (line = reader.readLine()) != null; ) {
line = line.trim();
if (!line.isEmpty()) {
Annotation annotation = tokenizer.process(line);
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
outputTree(System.out, sentence, outputFormats);
}
} else {
// Output blank lines for blank lines so the tool can be
// used for line-by-line text processing
System.out.println();
}
}
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class Sentence method openieTriples.
/**
* Get the OpenIE triples associated with this sentence.
* Note that this function may be slower than you would expect, as it has to
* convert the underlying Protobuf representation back into {@link CoreLabel}s.
*
* @param props The properties to use for the OpenIE annotator.
* @return A collection of {@link RelationTriple} objects representing the OpenIE triples in the sentence.
*/
public Collection<RelationTriple> openieTriples(Properties props) {
document.runOpenie(props);
synchronized (impl) {
List<CoreLabel> tokens = asCoreLabels();
Annotation doc = document.asAnnotation();
return impl.getOpenieTripleList().stream().map(x -> ProtobufAnnotationSerializer.fromProto(x, doc, this.sentenceIndex())).collect(Collectors.toList());
}
}
Aggregations