use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class SUTimePipeline method main.
public static void main(String[] args) throws IOException {
SUTimePipeline pipeline = new SUTimePipeline();
Annotator timeAnnotator = pipeline.getTimeAnnotator("sutime", new Properties());
BufferedReader is = new BufferedReader(new InputStreamReader(System.in));
System.out.print("> ");
for (String line; (line = is.readLine()) != null; ) {
Annotation ann = pipeline.process(line, null, timeAnnotator);
System.out.println(ann.get(TimeAnnotations.TimexAnnotations.class));
System.out.print("> ");
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class SUTimeServlet method addResults.
private void addResults(HttpServletRequest request, HttpServletResponse response) throws IOException {
// if we can't handle UTF-8, need to do something like this...
//String originalQuery = request.getParameter("q");
//String query = WebappUtil.convertString(originalQuery);
String query = request.getParameter("q");
String dateString = request.getParameter("d");
// TODO: this always returns true...
boolean dateError = !pipeline.isDateOkay(dateString);
boolean includeOffsets = parseBoolean(request.getParameter("includeOffsets"));
PrintWriter out = response.getWriter();
if (dateError) {
out.println("<br><br>Warning: unparseable date " + StringEscapeUtils.escapeHtml4(dateString));
}
if (!StringUtils.isNullOrEmpty(query)) {
Properties props = getTimeAnnotatorProperties(request);
String annotatorType = request.getParameter("annotator");
if (annotatorType == null) {
annotatorType = "sutime";
}
Annotator timeAnnotator = pipeline.getTimeAnnotator(annotatorType, props);
if (timeAnnotator != null) {
Annotation anno = pipeline.process(query, dateString, timeAnnotator);
out.println("<h3>Annotated Text</h3> <em>(tagged using " + annotatorType + "</em>)");
displayAnnotation(out, query, anno, includeOffsets);
} else {
out.println("<br><br>Error creating annotator for " + StringEscapeUtils.escapeHtml4(annotatorType));
}
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class TimexTreeAnnotator method annotate.
public void annotate(Annotation document) {
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
final List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexSpans(0);
// add a tree to each timex annotation
for (CoreMap timexAnn : sentence.get(TimeAnnotations.TimexAnnotations.class)) {
Tree subtree;
final int timexBegin = beginOffset(timexAnn);
final int timexEnd = endOffset(timexAnn);
Iterable<Tree> possibleMatches;
switch(this.matchType) {
// only use trees that match exactly
case ExactMatch:
possibleMatches = Iterables.filter(tree, tree1 -> {
int treeBegin = beginOffset(tree, tokens);
int treeEnd = endOffset(tree, tokens);
return treeBegin == timexBegin && timexEnd == treeEnd;
});
Iterator<Tree> treeIter = possibleMatches.iterator();
subtree = treeIter.hasNext() ? treeIter.next() : null;
break;
// select the smallest enclosing tree
case SmallestEnclosing:
possibleMatches = Iterables.filter(tree, tree1 -> {
int treeBegin = beginOffset(tree, tokens);
int treeEnd = endOffset(tree, tokens);
return treeBegin <= timexBegin && timexEnd <= treeEnd;
});
List<Tree> sortedMatches = CollectionUtils.toList(possibleMatches);
Collections.sort(sortedMatches, (tree1, tree2) -> {
Integer width1 = endOffset(tree1, tokens) - beginOffset(tree1, tokens);
Integer width2 = endOffset(tree2, tokens) - endOffset(tree2, tokens);
return width1.compareTo(width2);
});
subtree = sortedMatches.get(0);
break;
// more cases could go here if they're added
default:
throw new RuntimeException("unexpected match type");
}
// add the subtree to the time annotation
if (subtree != null) {
timexAnn.set(TreeCoreAnnotations.TreeAnnotation.class, subtree);
}
}
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class SUTimeSimpleParser method parse.
/**
* Parse a string with SUTime.
*
* @throws SUTimeParsingError if anything goes wrong
*/
public static Temporal parse(String str) throws SUTimeParsingError {
try {
Annotation doc = new Annotation(str);
pipeline.annotate(doc);
assert doc.get(CoreAnnotations.SentencesAnnotation.class) != null;
assert doc.get(CoreAnnotations.SentencesAnnotation.class).size() > 0;
List<CoreMap> timexAnnotations = doc.get(TimeAnnotations.TimexAnnotations.class);
if (timexAnnotations.size() > 1) {
throw new RuntimeException("Too many timexes for '" + str + "'");
}
CoreMap timex = timexAnnotations.get(0);
return timex.get(TimeExpression.Annotation.class).getTemporal();
} catch (Exception e) {
SUTimeSimpleParser.SUTimeParsingError parsingError = new SUTimeSimpleParser.SUTimeParsingError(str);
parsingError.initCause(e);
throw parsingError;
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class TSVUtils method parseSentence.
/** Create an Annotation object (with a single sentence) from the given specification */
private static Annotation parseSentence(Optional<String> docid, Optional<Integer> sentenceIndex, String gloss, Function<List<CoreLabel>, SemanticGraph> tree, Function<List<CoreLabel>, SemanticGraph> maltTree, List<String> words, List<String> lemmas, List<String> pos, List<String> ner, Optional<String> sentenceid) {
// Error checks
if (lemmas.size() != words.size()) {
throw new IllegalArgumentException("Array lengths don't match: " + words.size() + " vs " + lemmas.size() + " (sentence " + sentenceid.orElse("???") + ")");
}
if (pos.size() != words.size()) {
throw new IllegalArgumentException("Array lengths don't match: " + words.size() + " vs " + pos.size() + " (sentence " + sentenceid.orElse("???") + ")");
}
if (ner.size() != words.size()) {
throw new IllegalArgumentException("Array lengths don't match: " + words.size() + " vs " + ner.size() + " (sentence " + sentenceid.orElse("???") + ")");
}
// Create structure
List<CoreLabel> tokens = new ArrayList<>(words.size());
int beginChar = 0;
for (int i = 0; i < words.size(); ++i) {
CoreLabel token = new CoreLabel(12);
token.setWord(words.get(i));
token.setValue(words.get(i));
token.setBeginPosition(beginChar);
token.setEndPosition(beginChar + words.get(i).length());
beginChar += words.get(i).length() + 1;
token.setLemma(lemmas.get(i));
token.setTag(pos.get(i));
token.setNER(ner.get(i));
token.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
token.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
token.set(CoreAnnotations.IndexAnnotation.class, i + 1);
token.set(CoreAnnotations.TokenBeginAnnotation.class, i);
token.set(CoreAnnotations.TokenEndAnnotation.class, i + 1);
tokens.add(token);
}
gloss = gloss.replace("\\n", "\n").replace("\\t", "\t");
CoreMap sentence = new ArrayCoreMap(16);
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
SemanticGraph graph = tree.apply(tokens);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
SemanticGraph maltGraph = maltTree.apply(tokens);
sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, maltGraph);
sentence.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
sentence.set(CoreAnnotations.TextAnnotation.class, gloss);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokens.size());
Annotation doc = new Annotation(gloss);
doc.set(CoreAnnotations.TokensAnnotation.class, tokens);
doc.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
doc.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
doc.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
return doc;
}
Aggregations