use of edu.stanford.nlp.pipeline.Annotation in project neo4j-nlp-stanfordnlp by graphaware.
the class StanfordTextProcessor method annotateTags.
@Override
public List<Tag> annotateTags(String text, String lang) {
List<Tag> result = new ArrayList<>();
Annotation document = new Annotation(text);
pipelines.get(TOKENIZER).annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
Optional<CoreMap> sentence = sentences.stream().findFirst();
if (sentence.isPresent()) {
Stream<Tag> oTags = sentence.get().get(CoreAnnotations.TokensAnnotation.class).stream().map((token) -> getTag(lang, token)).filter((tag) -> (tag != null) && checkLemmaIsValid(tag.getLemma()));
oTags.forEach((tag) -> result.add(tag));
}
return result;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReader method main.
/**
* Reads and dumps output, mainly for debugging.
*/
public static void main(String[] args) throws IOException {
Properties props = StringUtils.argsToProperties(args);
boolean debug = false;
String filepath = props.getProperty("i");
String outfile = props.getProperty("o");
if (filepath == null || outfile == null) {
usage();
System.exit(-1);
}
PrintWriter fout = new PrintWriter(outfile);
logger.info("Writing to " + outfile);
String ext = props.getProperty("ext");
Options options;
if (ext != null) {
options = new Options(".*" + ext + "$");
} else {
options = new Options();
}
options.annotateTreeCoref = true;
options.annotateTreeNer = true;
CorpusStats corpusStats = new CorpusStats();
CoNLLDocumentReader reader = new CoNLLDocumentReader(filepath, options);
int docCnt = 0;
int sentCnt = 0;
int tokenCnt = 0;
for (CoNLLDocument doc; (doc = reader.getNextDocument()) != null; ) {
corpusStats.process(doc);
docCnt++;
Annotation anno = doc.getAnnotation();
if (debug)
logger.info("Document " + docCnt + ": " + anno.get(CoreAnnotations.DocIDAnnotation.class));
for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
if (debug)
logger.info("Parse: " + sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
if (debug)
logger.info("Sentence Tokens: " + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), ","));
writeTabSep(fout, sentence, doc.corefChainMap);
sentCnt++;
tokenCnt += sentence.get(CoreAnnotations.TokensAnnotation.class).size();
}
if (debug) {
for (CoreMap ner : doc.nerChunks) {
logger.info("NER Chunk: " + ner);
}
for (String id : doc.corefChainMap.keySet()) {
logger.info("Coref: " + id + " = " + StringUtils.join(doc.corefChainMap.get(id), ";"));
}
}
}
fout.close();
logger.info("Total document count: " + docCnt);
logger.info("Total sentence count: " + sentCnt);
logger.info("Total token count: " + tokenCnt);
logger.info(corpusStats);
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class RothCONLL04Reader method main.
public static void main(String[] args) throws Exception {
// just a simple test, to make sure stuff works
Properties props = StringUtils.argsToProperties(args);
RothCONLL04Reader reader = new RothCONLL04Reader();
reader.setLoggerLevel(Level.INFO);
reader.setProcessor(new StanfordCoreNLP(props));
Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp");
System.out.println(AnnotationUtils.datasetToString(doc));
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class AnnotationUtils method createDataset.
/**
* Given a list of sentences (as CoreMaps), wrap it in a new Annotation.
*/
public static Annotation createDataset(List<CoreMap> sentences) {
Annotation dataset = new Annotation("");
addSentences(dataset, sentences);
return dataset;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class CoreNLPServlet method addResults.
public void addResults(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
String input = request.getParameter("input");
if (input == null) {
return;
}
input = input.trim();
if (input.isEmpty()) {
return;
}
PrintWriter out = response.getWriter();
if (input.length() > MAXIMUM_QUERY_LENGTH) {
out.print("<div>This query is too long. If you want to run very long queries, please download and use our <a href=\"http://nlp.stanford.edu/software/corenlp.html\">publicly released distribution</a>.</div>");
return;
}
Annotation annotation = new Annotation(input);
pipeline.annotate(annotation);
String outputFormat = request.getParameter("outputFormat");
if (outputFormat == null || outputFormat.trim().isEmpty()) {
outputFormat = this.defaultFormat;
}
switch(outputFormat) {
case "xml":
outputXml(out, annotation);
break;
case "json":
outputJson(out, annotation);
break;
case "conll":
outputCoNLL(out, annotation);
break;
case "pretty":
outputPretty(out, annotation);
break;
default:
outputVisualise(out, annotation);
break;
}
}
Aggregations