use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class CoreNLPServlet method init.
public void init() throws ServletException {
pipeline = new StanfordCoreNLP();
String xslPath = getServletContext().getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl");
try {
Builder builder = new Builder();
Document stylesheet = builder.build(new File(xslPath));
corenlpTransformer = new XSLTransform(stylesheet);
} catch (Exception e) {
throw new ServletException(e);
}
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class SentenceTest method testFromCoreMapCorrectnessCheck.
@Test
public void testFromCoreMapCorrectnessCheck() {
StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties() {
{
setProperty("annotators", "tokenize,ssplit");
}
});
Annotation ann = new Annotation("This is a sentence.");
pipeline.annotate(ann);
CoreMap map = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
Sentence s = new Sentence(map);
assertEquals(ann.get(CoreAnnotations.TextAnnotation.class), s.text());
assertEquals("This", s.word(0));
assertEquals(5, s.length());
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class SentenceTest method testFromCoreMapCrashCheck.
@Test
public void testFromCoreMapCrashCheck() {
StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties() {
{
setProperty("annotators", "tokenize,ssplit");
}
});
Annotation ann = new Annotation("This is a sentence.");
pipeline.annotate(ann);
CoreMap map = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
new Sentence(map);
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project Info-Evaluation by TechnionYP5777.
the class AnalyzeParagraph method AnalyzeArrestsQuery.
public void AnalyzeArrestsQuery() {
/*
* First step is initiating the Stanford CoreNLP pipeline (the pipeline
* will be later used to evaluate the text and annotate it) Pipeline is
* initiated using a Properties object which is used for setting all
* needed entities, annotations, training data and so on, in order to
* customized the pipeline initialization to contains only the models
* you need
*/
final Properties props = new Properties();
/*
* The "annotators" property key tells the pipeline which entities
* should be initiated with our pipeline object, See
* http://nlp.stanford.edu/software/corenlp.shtml for a complete
* reference to the "annotators" values you can set here and what they
* will contribute to the analyzing process
*/
props.put("annotators", "tokenize,ssplit, pos, regexner, parse,lemma,natlog,openie");
final StanfordCoreNLP pipeLine = new StanfordCoreNLP(props);
// inputText will be the text to evaluate in this example
int index = 0;
for (final Element paragraph : this.Paragraphs) {
final String inputText = paragraph.text() + "";
final Annotation document = new Annotation(inputText);
System.out.println(document);
String reason = "";
// more details about the reason. e.g - where it
String details = "";
// happened.
String aux = "";
String prefixDetails = "";
// this string tells us what is the penalty for the arrest.
String penalty = "";
// Finally we use the pipeline to annotate the document we created
pipeLine.annotate(document);
for (final CoreMap sentence : document.get(SentencesAnnotation.class)) {
Sentence sent = new Sentence(sentence);
if (sent.text().contains("sentenced") || sent.text().contains("juried") || sent.text().contains("sent to jail") || sent.text().contains("charged")) {
penalty = ArrestPenalty(sent);
System.out.println("Sentenced for:" + penalty);
}
final SemanticGraph dependencies = sentence.get(CollapsedDependenciesAnnotation.class);
for (final IndexedWord root : dependencies.getRoots()) for (final SemanticGraphEdge edge : dependencies.getOutEdgesSorted(root)) {
final IndexedWord dep = edge.getDependent();
final String rel = edge.getRelation() + "";
if (!"arrested".equals(edge.getGovernor().word()))
switch(rel) {
case "nmod:in":
details += "in" + " " + dep.word() + " ";
break;
case "nmod:during":
details += "during" + " " + dep.word() + " ";
break;
case "nmod:at":
details += "at" + " " + dep.word() + " ";
break;
}
else {
//Finding the reason in the paragraph
if ("advcl".equals(rel) || "advcl:for".equals(rel) || "nmod:for".equals(rel)) {
for (final SemanticGraphEdge keshet : dependencies.getOutEdgesSorted(dep)) {
final String rel2 = keshet.getRelation() + "";
final IndexedWord dep2 = keshet.getDependent();
if ("amod".equals(rel2) || "dobj".equals(rel2)) {
reason += dep2.word() + " ";
try {
prefixDetails = ((sentence + "").substring(dep.beginPosition(), dep2.endPosition()));
} catch (IndexOutOfBoundsException e) {
prefixDetails = sentence + "";
}
}
if ("xcomp".equals(rel2))
aux += " " + dep2.word();
switch(rel2) {
case "nmod:in":
final String longLocation = dep2.word();
details += "in ";
for (final SemanticGraphEdge keshet2 : dependencies.getOutEdgesSorted(dep2)) if ("compound".equals(keshet2.getRelation() + ""))
details += keshet2.getDependent().word() + " ";
details += longLocation;
break;
case "nmod:during":
details += "during" + " " + dep2.word() + " ";
break;
case "nmod:under":
details += "under " + dep2.word() + " ";
break;
case "nmod:of":
details += "of " + dep2.word();
break;
case "nmod:at":
details += "at" + " " + dep2.word() + " ";
break;
}
if ("suspicion".equals(keshet.getSource().word()) && "acl:of".equals(rel2))
details += dep2.word();
}
reason += dep.word();
reason += aux;
}
}
}
if (!"".equals(prefixDetails.trim())) {
this.Information.add(prefixDetails.trim());
System.out.println((this.Information.get(index) + ""));
++index;
}
this.Information.add((reason + " " + details).trim());
System.out.println((this.Information.get(index) + ""));
++index;
}
}
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project textdb by TextDB.
the class NlpEntityOperator method extractNlpSpans.
/**
* @param iField
* @param attributeName
* @return
* @about This function takes an IField(TextField) and a String (the field's
* name) as input and uses the Stanford NLP package to process the
* field based on the input token type and nlpTypeIndicator. In the
* result spans, value represents the word itself and key represents
* the recognized token type
* @overview First set up a pipeline of Annotators based on the
* nlpTypeIndicator. If the nlpTypeIndicator is "NE_ALL", we set
* up the NamedEntityTagAnnotator, if it's "POS", then only
* PartOfSpeechAnnotator is needed.
* <p>
* The pipeline has to be this order: TokenizerAnnotator,
* SentencesAnnotator, PartOfSpeechAnnotator, LemmaAnnotator and
* NamedEntityTagAnnotator.
* <p>
* In the pipeline, each token is wrapped as a CoreLabel and each
* sentence is wrapped as CoreMap. Each annotator adds its
* annotation to the CoreMap(sentence) or CoreLabel(token) object.
* <p>
* After the pipeline, scan each CoreLabel(token) for its
* NamedEntityAnnotation or PartOfSpeechAnnotator depends on the
* nlpTypeIndicator
* <p>
* For each Stanford NLP annotation, get it's corresponding
* inputnlpEntityType that used in this package, then check if it
* equals to the input token type. If yes, makes it a span and add
* to the return list.
* <p>
* The NLP package has annotations for the start and end position
* of a token and it perfectly matches the span design so we just
* use them.
* <p>
* For Example: With TextField value: "Microsoft, Google and
* Facebook are organizations while Donald Trump and Barack Obama
* are persons", with attributeName: Sentence1 and inputTokenType is
* Organization. Since the inputTokenType require us to use
* NamedEntity Annotator in the Stanford NLP package, the
* nlpTypeIndicator would be set to "NE". The pipeline would set
* up to cover the Named Entity Recognizer. Then get the value of
* NamedEntityTagAnnotation for each CoreLabel(token).If the value
* is the token type "Organization", then it meets the
* requirement. In this case "Microsoft","Google" and "Facebook"
* will satisfy the requirement. "Donald Trump" and "Barack Obama"
* would have token type "Person" and do not meet the requirement.
* For each qualified token, create a span accordingly and add it
* to the returned list. In this case, token "Microsoft" would be
* span: ["Sentence1", 0, 9, Organization, "Microsoft"]
*/
private List<Span> extractNlpSpans(IField iField, String attributeName) {
List<Span> spanList = new ArrayList<>();
String text = (String) iField.getValue();
Properties props = new Properties();
// Setup Stanford NLP pipeline based on nlpTypeIndicator
StanfordCoreNLP pipeline = null;
if (getNlpTypeIndicator(predicate.getNlpEntityType()).equals("POS")) {
props.setProperty("annotators", "tokenize, ssplit, pos");
if (posPipeline == null) {
posPipeline = new StanfordCoreNLP(props);
}
pipeline = posPipeline;
} else {
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, " + "ner");
if (nerPipeline == null) {
nerPipeline = new StanfordCoreNLP(props);
}
pipeline = nerPipeline;
}
Annotation documentAnnotation = new Annotation(text);
pipeline.annotate(documentAnnotation);
List<CoreMap> sentences = documentAnnotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String stanfordNlpConstant;
// Extract annotations based on nlpTypeIndicator
if (getNlpTypeIndicator(predicate.getNlpEntityType()).equals("POS")) {
stanfordNlpConstant = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
} else {
stanfordNlpConstant = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
}
NlpEntityType nlpEntityType = mapNlpEntityType(stanfordNlpConstant);
if (nlpEntityType == null) {
continue;
}
if (predicate.getNlpEntityType().equals(NlpEntityType.NE_ALL) || predicate.getNlpEntityType().equals(nlpEntityType)) {
int start = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
int end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
String word = token.get(CoreAnnotations.TextAnnotation.class);
Span span = new Span(attributeName, start, end, nlpEntityType.toString(), word);
if (spanList.size() >= 1 && (getNlpTypeIndicator(predicate.getNlpEntityType()).equals("NE_ALL"))) {
Span previousSpan = spanList.get(spanList.size() - 1);
if (previousSpan.getAttributeName().equals(span.getAttributeName()) && (span.getStart() - previousSpan.getEnd() <= 1) && previousSpan.getKey().equals(span.getKey())) {
Span newSpan = mergeTwoSpans(previousSpan, span);
span = newSpan;
spanList.remove(spanList.size() - 1);
}
}
spanList.add(span);
}
}
}
return spanList;
}
Aggregations