use of com.graphaware.nlp.domain.Phrase in project neo4j-nlp by graphaware.
the class StubTextProcessor method annotateText.
@Override
public AnnotatedText annotateText(String text, PipelineSpecification pipelineSpecification) {
this.lastPipelineUsed = pipelineSpecification.getName();
AnnotatedText annotatedText = new AnnotatedText();
String[] sentencesSplit = text.split("\\.");
int sentenceNumber = 0;
for (String stext : sentencesSplit) {
String[] parts = stext.split(" ");
int pos = 0;
final Sentence sentence = new Sentence(stext, sentenceNumber);
for (String token : parts) {
Tag tag = new Tag(token, pipelineSpecification.getLanguage());
if (!pipelineSpecification.getExcludedNER().contains("test")) {
tag.setNe(Collections.singletonList("test"));
}
tag.setPos(Collections.singletonList("TESTVB"));
int begin = pos;
pos += token.length() + 1;
sentence.addTagOccurrence(begin, pos, token, sentence.addTag(tag));
}
if (pipelineSpecification.hasProcessingStep("phrase")) {
Phrase phrase = new Phrase(stext);
sentence.addPhraseOccurrence(0, stext.length(), phrase);
}
annotatedText.addSentence(sentence);
sentenceNumber++;
}
return annotatedText;
}
Aggregations