use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.
the class TextProcessorTest method testLemmaLowerCasing.
@Test
public void testLemmaLowerCasing() {
String testText = "Collibra’s Data Governance Innovation: Enabling Data as a Strategic Asset";
AnnotatedText annotatedText = textProcessor.annotateText(testText, "en", PIPELINE_DEFAULT);
TestAnnotatedText test = new TestAnnotatedText(annotatedText);
test.assertSentencesCount(1);
assertEquals("governance", test.getTagAtPosition(0, 16).getLemma());
PipelineSpecification pipelineSpecification = new PipelineSpecification("tokenizeWithTrueCase", StanfordTextProcessor.class.getName());
pipelineSpecification.addProcessingStep("truecase");
textProcessor.createPipeline(pipelineSpecification);
annotatedText = textProcessor.annotateText(testText, "en", pipelineSpecification);
test = new TestAnnotatedText(annotatedText);
test.assertSentencesCount(1);
assertEquals("governance", test.getTagAtPosition(0, 16).getLemma());
assertEquals("Governance", test.getTagOccurrenceAtPosition(0, 16).getValue());
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.
the class DependencyParserTest method testEnhancedDependencyParsingWithQuestion.
@Test
public void testEnhancedDependencyParsingWithQuestion() throws Exception {
String text = "In what area was Frederic born in";
StanfordCoreNLP pipeline = ((StanfordTextProcessor) textProcessor).getPipeline("default");
Map<String, Object> customPipeline = new HashMap<>();
customPipeline.put("textProcessor", "com.graphaware.nlp.processor.stanford.StanfordTextProcessor");
customPipeline.put("name", "custom");
customPipeline.put("stopWords", "start,starts");
customPipeline.put("processingSteps", Collections.singletonMap("dependency", true));
PipelineSpecification pipelineSpecification = PipelineSpecification.fromMap(customPipeline);
((StanfordTextProcessor) textProcessor).createPipeline(pipelineSpecification);
textProcessor.annotateText(text, "en", pipelineSpecification);
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
System.out.println(sentence.toString());
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
graph.getRoots().forEach(root -> {
System.out.println(root);
});
System.out.println(graph);
for (SemanticGraphEdge edge : graph.edgeListSorted()) {
System.out.println(String.format("Source is : %s - Target is : %s - Relation is : %s", edge.getSource(), edge.getTarget(), edge.getRelation()));
}
}
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.
the class StanfordTextProcessor method annotateSentence.
@Override
public Tag annotateSentence(String text, String lang, PipelineSpecification pipelineSpecification) {
Annotation document = new Annotation(text);
pipelines.get(pipelineSpecification.getName()).annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
Optional<CoreMap> sentence = sentences.stream().findFirst();
if (sentence.isPresent()) {
Optional<Tag> oTag = sentence.get().get(CoreAnnotations.TokensAnnotation.class).stream().map((token) -> getTag(lang, token)).filter((tag) -> (tag != null) && checkLemmaIsValid(tag.getLemma())).findFirst();
if (oTag.isPresent()) {
return oTag.get();
}
}
return null;
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp by graphaware.
the class TextRank method getPipelineWithoutNEs.
private String getPipelineWithoutNEs(String language) {
String name = getPipelineWithoutNerKey(language);
if (!NLPManager.getInstance().getTextProcessorsManager().hasPipeline(name)) {
Map<String, Object> params = new HashMap<>();
params.put("tokenize", true);
params.put("ner", false);
String processor = NLPManager.getInstance().getTextProcessorsManager().getDefaultProcessor().getClass().getName();
PipelineSpecification ps = new PipelineSpecification(getPipelineWithoutNerKey(language), processor);
ps.setProcessingSteps(params);
NLPManager.getInstance().getTextProcessorsManager().addPipeline(ps);
}
return name;
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp by graphaware.
the class SingleLanguageSupportTest method testErrorOnMultipleLanguage.
@Test
public void testErrorOnMultipleLanguage() throws Exception {
PipelineSpecification pipelineSpecificationEn = new PipelineSpecification("english", StubTextProcessor.class.getName());
pipelineSpecificationEn.setLanguage("en");
createPipeline(pipelineSpecificationEn.getTextProcessor(), pipelineSpecificationEn.getName(), pipelineSpecificationEn.getLanguage());
PipelineSpecification pipelineSpecificationDe = new PipelineSpecification("german", StubTextProcessor.class.getName());
pipelineSpecificationDe.setLanguage("de");
boolean raiseAnError = false;
try {
createPipeline(pipelineSpecificationDe.getTextProcessor(), pipelineSpecificationDe.getName(), pipelineSpecificationDe.getLanguage());
} catch (Exception ex) {
if (ex.getMessage().contains("Multiple languages not supported in this version")) {
raiseAnError = true;
}
}
assertTrue(raiseAnError);
}
Aggregations