Search in sources :

Example 31 with PipelineSpecification

use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.

the class TextProcessorTest method testLemmaLowerCasing.

@Test
public void testLemmaLowerCasing() {
    String testText = "Collibra’s Data Governance Innovation: Enabling Data as a Strategic Asset";
    AnnotatedText annotatedText = textProcessor.annotateText(testText, "en", PIPELINE_DEFAULT);
    TestAnnotatedText test = new TestAnnotatedText(annotatedText);
    test.assertSentencesCount(1);
    assertEquals("governance", test.getTagAtPosition(0, 16).getLemma());
    PipelineSpecification pipelineSpecification = new PipelineSpecification("tokenizeWithTrueCase", StanfordTextProcessor.class.getName());
    pipelineSpecification.addProcessingStep("truecase");
    textProcessor.createPipeline(pipelineSpecification);
    annotatedText = textProcessor.annotateText(testText, "en", pipelineSpecification);
    test = new TestAnnotatedText(annotatedText);
    test.assertSentencesCount(1);
    assertEquals("governance", test.getTagAtPosition(0, 16).getLemma());
    assertEquals("Governance", test.getTagOccurrenceAtPosition(0, 16).getValue());
}
Also used : PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) StanfordTextProcessor(com.graphaware.nlp.processor.stanford.StanfordTextProcessor) Test(org.junit.Test)

Example 32 with PipelineSpecification

use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.

the class DependencyParserTest method testEnhancedDependencyParsingWithQuestion.

@Test
public void testEnhancedDependencyParsingWithQuestion() throws Exception {
    String text = "In what area was Frederic born in";
    StanfordCoreNLP pipeline = ((StanfordTextProcessor) textProcessor).getPipeline("default");
    Map<String, Object> customPipeline = new HashMap<>();
    customPipeline.put("textProcessor", "com.graphaware.nlp.processor.stanford.StanfordTextProcessor");
    customPipeline.put("name", "custom");
    customPipeline.put("stopWords", "start,starts");
    customPipeline.put("processingSteps", Collections.singletonMap("dependency", true));
    PipelineSpecification pipelineSpecification = PipelineSpecification.fromMap(customPipeline);
    ((StanfordTextProcessor) textProcessor).createPipeline(pipelineSpecification);
    textProcessor.annotateText(text, "en", pipelineSpecification);
    Annotation document = new Annotation(text);
    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        System.out.println(sentence.toString());
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
        graph.getRoots().forEach(root -> {
            System.out.println(root);
        });
        System.out.println(graph);
        for (SemanticGraphEdge edge : graph.edgeListSorted()) {
            System.out.println(String.format("Source is : %s - Target is : %s - Relation is : %s", edge.getSource(), edge.getTarget(), edge.getRelation()));
        }
    }
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) StanfordTextProcessor(com.graphaware.nlp.processor.stanford.StanfordTextProcessor) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) CoreMap(edu.stanford.nlp.util.CoreMap) Test(org.junit.Test)

Example 33 with PipelineSpecification

use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp-stanfordnlp by graphaware.

the class StanfordTextProcessor method annotateSentence.

@Override
public Tag annotateSentence(String text, String lang, PipelineSpecification pipelineSpecification) {
    Annotation document = new Annotation(text);
    pipelines.get(pipelineSpecification.getName()).annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Optional<CoreMap> sentence = sentences.stream().findFirst();
    if (sentence.isPresent()) {
        Optional<Tag> oTag = sentence.get().get(CoreAnnotations.TokensAnnotation.class).stream().map((token) -> getTag(lang, token)).filter((tag) -> (tag != null) && checkLemmaIsValid(tag.getLemma())).findFirst();
        if (oTag.isPresent()) {
            return oTag.get();
        }
    }
    return null;
}
Also used : java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) Log(org.neo4j.logging.Log) Tree(edu.stanford.nlp.trees.Tree) Matcher(java.util.regex.Matcher) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Pair(edu.stanford.nlp.util.Pair) CoreMap(edu.stanford.nlp.util.CoreMap) FileUtils(com.graphaware.nlp.util.FileUtils) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) com.graphaware.nlp.domain(com.graphaware.nlp.domain) Stream(java.util.stream.Stream) NERModelTool(com.graphaware.nlp.processor.stanford.model.NERModelTool) Word(edu.stanford.nlp.ling.Word) AbstractTextProcessor(com.graphaware.nlp.processor.AbstractTextProcessor) Annotation(edu.stanford.nlp.pipeline.Annotation) LoggerFactory(com.graphaware.common.log.LoggerFactory) StringUtils(edu.stanford.nlp.util.StringUtils) NLPTextProcessor(com.graphaware.nlp.annotation.NLPTextProcessor) PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) DEFAULT_BACKGROUND_SYMBOL(edu.stanford.nlp.sequences.SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 34 with PipelineSpecification

use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp by graphaware.

the class TextRank method getPipelineWithoutNEs.

private String getPipelineWithoutNEs(String language) {
    String name = getPipelineWithoutNerKey(language);
    if (!NLPManager.getInstance().getTextProcessorsManager().hasPipeline(name)) {
        Map<String, Object> params = new HashMap<>();
        params.put("tokenize", true);
        params.put("ner", false);
        String processor = NLPManager.getInstance().getTextProcessorsManager().getDefaultProcessor().getClass().getName();
        PipelineSpecification ps = new PipelineSpecification(getPipelineWithoutNerKey(language), processor);
        ps.setProcessingSteps(params);
        NLPManager.getInstance().getTextProcessorsManager().addPipeline(ps);
    }
    return name;
}
Also used : PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) TfIdfObject(com.graphaware.nlp.domain.TfIdfObject)

Example 35 with PipelineSpecification

use of com.graphaware.nlp.dsl.request.PipelineSpecification in project neo4j-nlp by graphaware.

the class SingleLanguageSupportTest method testErrorOnMultipleLanguage.

@Test
public void testErrorOnMultipleLanguage() throws Exception {
    PipelineSpecification pipelineSpecificationEn = new PipelineSpecification("english", StubTextProcessor.class.getName());
    pipelineSpecificationEn.setLanguage("en");
    createPipeline(pipelineSpecificationEn.getTextProcessor(), pipelineSpecificationEn.getName(), pipelineSpecificationEn.getLanguage());
    PipelineSpecification pipelineSpecificationDe = new PipelineSpecification("german", StubTextProcessor.class.getName());
    pipelineSpecificationDe.setLanguage("de");
    boolean raiseAnError = false;
    try {
        createPipeline(pipelineSpecificationDe.getTextProcessor(), pipelineSpecificationDe.getName(), pipelineSpecificationDe.getLanguage());
    } catch (Exception ex) {
        if (ex.getMessage().contains("Multiple languages not supported in this version")) {
            raiseAnError = true;
        }
    }
    assertTrue(raiseAnError);
}
Also used : PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) StubTextProcessor(com.graphaware.nlp.stub.StubTextProcessor) Test(org.junit.Test) NLPIntegrationTest(com.graphaware.nlp.NLPIntegrationTest)

Aggregations

PipelineSpecification (com.graphaware.nlp.dsl.request.PipelineSpecification)49 Test (org.junit.Test)22 StubTextProcessor (com.graphaware.nlp.stub.StubTextProcessor)13 StanfordTextProcessor (com.graphaware.nlp.processor.stanford.StanfordTextProcessor)11 NLPIntegrationTest (com.graphaware.nlp.NLPIntegrationTest)8 TestAnnotatedText (com.graphaware.nlp.util.TestAnnotatedText)8 Transaction (org.neo4j.graphdb.Transaction)8 AbstractEmbeddedTest (com.graphaware.nlp.AbstractEmbeddedTest)5 NLPTextProcessor (com.graphaware.nlp.annotation.NLPTextProcessor)4 TextProcessor (com.graphaware.nlp.processor.TextProcessor)3 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)3 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)3 CoreMap (edu.stanford.nlp.util.CoreMap)3 Description (org.neo4j.procedure.Description)3 LoggerFactory (com.graphaware.common.log.LoggerFactory)2 NLPManager (com.graphaware.nlp.NLPManager)2