use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class SentimentPipeline method main.
/**
* Runs the tree-based sentiment model on some text.
*/
public static void main(String[] args) throws IOException {
String parserModel = null;
String sentimentModel = null;
String filename = null;
String fileList = null;
boolean stdin = false;
boolean filterUnknown = false;
List<Output> outputFormats = Collections.singletonList(Output.ROOT);
Input inputFormat = Input.TEXT;
String tlppClass = DEFAULT_TLPP_CLASS;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
sentimentModel = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
parserModel = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-file")) {
filename = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-fileList")) {
fileList = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-stdin")) {
stdin = true;
argIndex++;
} else if (args[argIndex].equalsIgnoreCase("-input")) {
inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase(Locale.ROOT));
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-output")) {
String[] formats = args[argIndex + 1].split(",");
outputFormats = new ArrayList<>();
for (String format : formats) {
outputFormats.add(Output.valueOf(format.toUpperCase(Locale.ROOT)));
}
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
filterUnknown = true;
argIndex++;
} else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
tlppClass = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-help")) {
help();
System.exit(0);
} else {
log.info("Unknown argument " + args[argIndex + 1]);
help();
throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
}
}
// We construct two pipelines. One handles tokenization, if
// necessary. The other takes tokenized sentences and converts
// them to sentiment trees.
Properties pipelineProps = new Properties();
Properties tokenizerProps = null;
if (sentimentModel != null) {
pipelineProps.setProperty("sentiment.model", sentimentModel);
}
if (parserModel != null) {
pipelineProps.setProperty("parse.model", parserModel);
}
if (inputFormat == Input.TREES) {
pipelineProps.setProperty("annotators", "binarizer, sentiment");
pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
pipelineProps.setProperty("enforceRequirements", "false");
} else {
pipelineProps.setProperty("annotators", "parse, sentiment");
pipelineProps.setProperty("parse.binaryTrees", "true");
pipelineProps.setProperty("parse.buildgraphs", "false");
pipelineProps.setProperty("enforceRequirements", "false");
tokenizerProps = new Properties();
tokenizerProps.setProperty("annotators", "tokenize, ssplit");
}
if (stdin && tokenizerProps != null) {
tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
}
int count = 0;
if (filename != null)
count++;
if (fileList != null)
count++;
if (stdin)
count++;
if (count > 1) {
throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
}
if (count == 0) {
throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
}
StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
if (filename != null) {
// Process a file. The pipeline will do tokenization, which
// means it will split it into sentences as best as possible
// with the tokenizer.
List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
for (Annotation annotation : annotations) {
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println(sentence);
outputTree(System.out, sentence, outputFormats);
}
}
} else if (fileList != null) {
// for each file.
for (String file : fileList.split(",")) {
List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
FileOutputStream fout = new FileOutputStream(file + ".out");
PrintStream pout = new PrintStream(fout);
for (Annotation annotation : annotations) {
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
pout.println(sentence);
outputTree(pout, sentence, outputFormats);
}
}
pout.flush();
fout.close();
}
} else {
// Process stdin. Each line will be treated as a single sentence.
log.info("Reading in text from stdin.");
log.info("Please enter one sentence per line.");
log.info("Processing will end when EOF is reached.");
BufferedReader reader = IOUtils.readerFromStdin("utf-8");
for (String line; (line = reader.readLine()) != null; ) {
line = line.trim();
if (!line.isEmpty()) {
Annotation annotation = tokenizer.process(line);
pipeline.annotate(annotation);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
outputTree(System.out, sentence, outputFormats);
}
} else {
// Output blank lines for blank lines so the tool can be
// used for line-by-line text processing
System.out.println();
}
}
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class ProcessTokensRegexRequestTest method testTwoRequests.
/**
* Test two patterns that get one result each
*/
@Test
public void testTwoRequests() {
Annotation ann = pipeline.process("This is a small test");
CoreNLPProtos.TokensRegexRequest request = buildRequest(ann, "/small/", "/test/");
CoreNLPProtos.TokensRegexResponse response = ProcessTokensRegexRequest.processRequest(request);
Assert.assertEquals(response.getMatchList().size(), 2);
CoreNLPProtos.TokensRegexResponse.PatternMatch patternMatch = response.getMatchList().get(0);
Assert.assertEquals(patternMatch.getMatchList().size(), 1);
CoreNLPProtos.TokensRegexResponse.Match match = patternMatch.getMatchList().get(0);
Assert.assertEquals(match.getSentence(), 0);
Assert.assertEquals(match.getMatch().getText(), "small");
Assert.assertEquals(match.getMatch().getBegin(), 3);
Assert.assertEquals(match.getMatch().getEnd(), 4);
patternMatch = response.getMatchList().get(1);
Assert.assertEquals(patternMatch.getMatchList().size(), 1);
match = patternMatch.getMatchList().get(0);
Assert.assertEquals(match.getSentence(), 0);
Assert.assertEquals(match.getMatch().getText(), "test");
Assert.assertEquals(match.getMatch().getBegin(), 4);
Assert.assertEquals(match.getMatch().getEnd(), 5);
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class OpenIEServlet method doGet.
/**
* Actually perform the GET request, given all the relevant information (already sanity checked).
* This is the meat of the servlet code.
* @param out The writer to write the output to.
* @param q The query string.
*/
private void doGet(PrintWriter out, String q) {
// Clean the string a bit
q = q.trim();
if (q.length() == 0) {
return;
}
char lastChar = q.charAt(q.length() - 1);
if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
q = q + ".";
}
// Annotate
Annotation ann = new Annotation(q);
try {
// Collect results
Set<String> entailments = new HashSet<>();
Set<String> triples = new LinkedHashSet<>();
// pipeline must come before backoff
runWithPipeline(pipeline, ann, triples, entailments);
if (triples.size() == 0) {
// backoff must come after pipeline
runWithPipeline(backoff, ann, triples, entailments);
}
// Write results
out.println("{ " + "\"ok\":true, " + "\"entailments\": [" + StringUtils.join(entailments, ",") + "], " + "\"triples\": [" + StringUtils.join(triples, ",") + "], " + "\"msg\": \"\"" + " }");
} catch (Throwable t) {
out.println("{ok:false, entailments:[], triples:[], msg:" + quote(t.getMessage()) + "}");
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class DcorefPronounResolver method resolvePronouns.
@Override
protected HashMap<Integer, Integer> resolvePronouns(List<CoreLabel> tokens) {
HashMap<Integer, Integer> pronPairs = new HashMap<Integer, Integer>(1);
CoreMap sentence = new CoreLabel();
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, 1);
List<CoreMap> sentences = new ArrayList<CoreMap>(1);
sentences.add(sentence);
Annotation annotation = new Annotation(sentences);
pipeline.annotate(annotation);
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
for (CorefChain chain : corefChains.values()) {
CoreLabel firstRef = null;
for (CorefMention m : chain.getMentionsInTextualOrder()) {
CoreLabel lbl = tokens.get(m.headIndex - 1);
if (lbl.tag().startsWith("PRP") && firstRef != null) {
pronPairs.put(lbl.index(), firstRef.index());
} else if (!lbl.tag().startsWith("PRP") && firstRef == null) {
firstRef = lbl;
}
}
}
return pronPairs;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class GenericDataSetReader method parse.
/**
* Parses one file or directory with data from one domain
* @param path
* @throws IOException
*/
public final Annotation parse(String path) throws IOException {
// set below or exceptions
Annotation retVal;
try {
//
// this must return a dataset Annotation. each sentence in this dataset must contain:
// - TokensAnnotation
// - EntityMentionAnnotation
// - RelationMentionAnnotation
// - EventMentionAnnotation
// the other annotations (parse, NER) are generated in preProcessSentences
//
retVal = this.read(path);
} catch (Exception ex) {
IOException iox = new IOException(ex);
throw iox;
}
if (preProcessSentences) {
preProcessSentences(retVal);
if (MachineReadingProperties.trainUsePipelineNER) {
logger.severe("Changing NER tags using the CoreNLP pipeline.");
modifyUsingCoreNLPNER(retVal);
}
}
return retVal;
}
Aggregations