use of com.graphaware.nlp.domain.Sentence in project neo4j-nlp by graphaware.
the class AnnotatedTextPersistenceTest method createAnnotatedTextFor.
private AnnotatedText createAnnotatedTextFor(String text, String expectedTokenForPOS, String expectedPOS) {
AnnotatedText annotatedText = new AnnotatedText();
annotatedText.setText(text);
AtomicInteger inc = new AtomicInteger();
for (String s : text.split("\\.")) {
Sentence sentence = new Sentence(s, inc.get());
for (String token : s.split(" ")) {
Tag tag = new Tag(token, "en");
if (token.equals(expectedTokenForPOS)) {
tag.setPos(Collections.singletonList(expectedPOS));
}
sentence.addTagOccurrence(0, 20, token, sentence.addTag(tag));
}
inc.incrementAndGet();
annotatedText.addSentence(sentence);
}
return annotatedText;
}
use of com.graphaware.nlp.domain.Sentence in project neo4j-nlp by graphaware.
the class AnnotatedTextTest method testFilter.
@Test
public void testFilter() {
AnnotatedText annotatedText = new AnnotatedText();
Sentence sentence = new Sentence(SHORT_TEXT_1, 0);
sentence.addTag(getTag("BBC", null));
sentence.addTag(getTag("China", "LOCATION"));
annotatedText.addSentence(sentence);
assertTrue(annotatedText.filter("BBC"));
assertTrue(annotatedText.filter("China/LOCATION"));
}
use of com.graphaware.nlp.domain.Sentence in project neo4j-nlp by graphaware.
the class StubTextProcessor method annotateText.
@Override
public AnnotatedText annotateText(String text, PipelineSpecification pipelineSpecification) {
this.lastPipelineUsed = pipelineSpecification.getName();
AnnotatedText annotatedText = new AnnotatedText();
String[] sentencesSplit = text.split("\\.");
int sentenceNumber = 0;
for (String stext : sentencesSplit) {
String[] parts = stext.split(" ");
int pos = 0;
final Sentence sentence = new Sentence(stext, sentenceNumber);
for (String token : parts) {
Tag tag = new Tag(token, pipelineSpecification.getLanguage());
if (!pipelineSpecification.getExcludedNER().contains("test")) {
tag.setNe(Collections.singletonList("test"));
}
tag.setPos(Collections.singletonList("TESTVB"));
int begin = pos;
pos += token.length() + 1;
sentence.addTagOccurrence(begin, pos, token, sentence.addTag(tag));
}
if (pipelineSpecification.hasProcessingStep("phrase")) {
Phrase phrase = new Phrase(stext);
sentence.addPhraseOccurrence(0, stext.length(), phrase);
}
annotatedText.addSentence(sentence);
sentenceNumber++;
}
return annotatedText;
}
Aggregations