use of com.graphaware.nlp.domain.AnnotatedText in project neo4j-nlp by graphaware.
the class AnnotatedTextPersistenceTest method createAnnotatedTextFor.
private AnnotatedText createAnnotatedTextFor(String text, String expectedTokenForPOS, String expectedPOS) {
AnnotatedText annotatedText = new AnnotatedText();
annotatedText.setText(text);
AtomicInteger inc = new AtomicInteger();
for (String s : text.split("\\.")) {
Sentence sentence = new Sentence(s, inc.get());
for (String token : s.split(" ")) {
Tag tag = new Tag(token, "en");
if (token.equals(expectedTokenForPOS)) {
tag.setPos(Collections.singletonList(expectedPOS));
}
sentence.addTagOccurrence(0, 20, token, sentence.addTag(tag));
}
inc.incrementAndGet();
annotatedText.addSentence(sentence);
}
return annotatedText;
}
use of com.graphaware.nlp.domain.AnnotatedText in project neo4j-nlp by graphaware.
the class AnnotatedTextPersistenceTest method testTagsHavingTwoDifferentPOSInDifferentSentencesShouldReflectBothPOS.
@Test
public void testTagsHavingTwoDifferentPOSInDifferentSentencesShouldReflectBothPOS() {
String text = "The discipline of preparing and peer reviewing formal engineering reports leads to a high degree of accuracy and technical rigor.";
String text2 = "During this effort to establish accurate crack information, it was discovered that several cracks were kinked rather than extending in a self-similar crack growth direction as was implied by the sketches and analyses reports in the briefing charts.";
TestNLPGraph test = new TestNLPGraph(getDatabase());
AnnotatedText at1 = createAnnotatedTextFor(text, "reports", "VGB");
try (Transaction tx = getDatabase().beginTx()) {
getNLPManager().getPersister(AnnotatedText.class).persist(at1, "test-a", "1");
tx.success();
}
test.assertTagWithValueHasPos("reports", "VGB");
AnnotatedText at2 = createAnnotatedTextFor(text2, "reports", "NNS");
try (Transaction tx = getDatabase().beginTx()) {
getNLPManager().getPersister(AnnotatedText.class).persist(at2, "test-b", String.valueOf(System.currentTimeMillis()));
tx.success();
}
test.assertTagWithValueHasPos("reports", "VGB");
test.assertTagWithValueHasPos("reports", "NNS");
}
use of com.graphaware.nlp.domain.AnnotatedText in project neo4j-nlp by graphaware.
the class AnnotatedTextPersistenceTest method testTagOccurrenceGetANERProperty.
@Test
public void testTagOccurrenceGetANERProperty() {
clearDb();
AnnotatedText annotatedText = createAnnotatedTextWithSameTagInSameTextWithDifferentPos();
try (Transaction tx = getDatabase().beginTx()) {
getNLPManager().getPersister(AnnotatedText.class).persist(annotatedText, "test", "1");
tx.success();
}
executeInTransaction("MATCH (n:TagOccurrence) WHERE n.value = 'cool' RETURN n", (result -> {
assertTrue(result.hasNext());
Node n = (Node) result.next().get("n");
String[] ners = (String[]) n.getProperty("ne");
assertTrue(Arrays.asList(ners).contains("NER_Cool0"));
}));
}
use of com.graphaware.nlp.domain.AnnotatedText in project neo4j-nlp by graphaware.
the class AnnotatedTextTest method testFilter.
@Test
public void testFilter() {
AnnotatedText annotatedText = new AnnotatedText();
Sentence sentence = new Sentence(SHORT_TEXT_1, 0);
sentence.addTag(getTag("BBC", null));
sentence.addTag(getTag("China", "LOCATION"));
annotatedText.addSentence(sentence);
assertTrue(annotatedText.filter("BBC"));
assertTrue(annotatedText.filter("China/LOCATION"));
}
use of com.graphaware.nlp.domain.AnnotatedText in project neo4j-nlp by graphaware.
the class StubTextProcessor method annotateText.
@Override
public AnnotatedText annotateText(String text, PipelineSpecification pipelineSpecification) {
this.lastPipelineUsed = pipelineSpecification.getName();
AnnotatedText annotatedText = new AnnotatedText();
String[] sentencesSplit = text.split("\\.");
int sentenceNumber = 0;
for (String stext : sentencesSplit) {
String[] parts = stext.split(" ");
int pos = 0;
final Sentence sentence = new Sentence(stext, sentenceNumber);
for (String token : parts) {
Tag tag = new Tag(token, pipelineSpecification.getLanguage());
if (!pipelineSpecification.getExcludedNER().contains("test")) {
tag.setNe(Collections.singletonList("test"));
}
tag.setPos(Collections.singletonList("TESTVB"));
int begin = pos;
pos += token.length() + 1;
sentence.addTagOccurrence(begin, pos, token, sentence.addTag(tag));
}
if (pipelineSpecification.hasProcessingStep("phrase")) {
Phrase phrase = new Phrase(stext);
sentence.addPhraseOccurrence(0, stext.length(), phrase);
}
annotatedText.addSentence(sentence);
sentenceNumber++;
}
return annotatedText;
}
Aggregations