use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.
the class TextProcessorTest method testAddPipelineTakesStopwordsIntoAccountAfterNormalAnnotation.
@Test
public void testAddPipelineTakesStopwordsIntoAccountAfterNormalAnnotation() {
String text = "det, vad, eller, sin, efter, i, varje, sådan, de, ditt, han, dessa, vi, med, då, den, mig, denna, ingen, under, henne, sådant, du, hade, vilken,".replaceAll(",", "");
PipelineSpecification specification = new PipelineSpecification("customsw2", StanfordTextProcessor.class.getName());
specification.getProcessingSteps().put("tokenize", true);
String stopwords = "sådan, själv, dig, från, vilkas, dem, ett, varit, varför, att, era, som";
specification.setStopWords(stopwords);
AnnotatedText annotatedText = textProcessor.annotateText(text, "en", specification);
List<String> blacklist = Arrays.asList(stopwords.split(","));
annotatedText.getTags().forEach(tag -> {
assertFalse(blacklist.contains(tag.getLemma()));
});
PipelineSpecification specification2 = new PipelineSpecification("customsw3", StanfordTextProcessor.class.getName());
specification.getProcessingSteps().put("tokenize", true);
String stopwords2 = "eller, sådan,själv, dig, från, vilkas, dem, ett, varit, varför, att, era, som";
specification2.setStopWords(stopwords2);
AnnotatedText annotatedText2 = textProcessor.annotateText(text, "en", specification2);
List<String> blacklist2 = Arrays.asList(stopwords2.split(","));
annotatedText2.getTags().forEach(tag -> {
assertFalse(blacklist2.contains(tag.getLemma()));
});
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.
the class TextProcessorTest method testAnnotateTextWithSpecification.
@Test
public void testAnnotateTextWithSpecification() {
PipelineSpecification specification = new PipelineSpecification("custom", StanfordTextProcessor.class.getName());
specification.getProcessingSteps().put("tokenize", true);
specification.getProcessingSteps().put("ner", true);
specification.getExcludedNER().add("LOCATION");
String text = "My name is John Doe and I work in Switzerland";
AnnotatedText annotatedText = textProcessor.annotateText(text, "en", specification);
assertEquals(1, annotatedText.getSentences().size());
int numberOfLocationEntities = 0;
for (Sentence sentence : annotatedText.getSentences()) {
for (List<TagOccurrence> olist : sentence.getTagOccurrences().values()) {
for (TagOccurrence occurrence : olist) {
if (occurrence.getElement().getNeAsList().contains("LOCATION")) {
numberOfLocationEntities++;
}
}
}
}
assertEquals(0, numberOfLocationEntities);
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.
the class DependencyParserTest method setUp.
@BeforeClass
public static void setUp() {
textProcessor = ServiceLoader.loadTextProcessor("com.graphaware.nlp.processor.stanford.StanfordTextProcessor");
textProcessor.init();
Map<String, Object> processingSteps = new HashMap<>();
processingSteps.put(AbstractTextProcessor.STEP_TOKENIZE, true);
processingSteps.put(AbstractTextProcessor.STEP_NER, true);
processingSteps.put(AbstractTextProcessor.STEP_DEPENDENCY, true);
PipelineSpecification pipelineSpecification = new PipelineSpecification("default", StanfordTextProcessor.class.getName(), processingSteps, null, 1L, Collections.emptyList(), Collections.emptyList());
textProcessor.createPipeline(pipelineSpecification);
PIPELINE_DEFAULT = pipelineSpecification;
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp by graphaware.
the class TextProcessorsManager method annotateTag.
public Tag annotateTag(String text, String language) {
PipelineSpecification spec = getDefaultPipeline(language);
if (spec == null) {
LOG.warn("No default annotator for language: " + language);
return null;
}
TextProcessor processor = getTextProcessor(spec.getTextProcessor());
return processor.annotateTag(text, spec);
}
use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp by graphaware.
the class TextProcessorsManager method getDefaultPipeline.
public PipelineSpecification getDefaultPipeline(String language) {
if (defaultPipelineByLanguage.containsKey(language)) {
return defaultPipelineByLanguage.get(language);
}
PipelineSpecification pipelineSpecification = getDefaultPipelineSpecificationFromConfig(language);
if (pipelineSpecification == null) {
return null;
}
defaultPipelineByLanguage.put(language, pipelineSpecification);
return pipelineSpecification;
}
Aggregations