Search in sources :

Example 6 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class NLPManager method annotateTextAndPersist.

public Node annotateTextAndPersist(String text, String id, String textProcessor, String pipelineName, boolean force, boolean checkForLanguage) {
    String lang = checkTextLanguage(text, checkForLanguage);
    String pipeline = getPipeline(pipelineName);
    PipelineSpecification pipelineSpecification = getConfiguration().loadPipeline(pipeline);
    if (null == pipelineSpecification) {
        throw new RuntimeException("No pipeline " + pipelineName + " found.");
    }
    TextProcessor processor = textProcessorsManager.getTextProcessor(pipelineSpecification.getTextProcessor());
    AnnotatedText at = processor.annotateText(text, lang, pipelineSpecification);
    return processAnnotationPersist(id, text, at, pipelineSpecification);
}
Also used : PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) TextProcessor(com.graphaware.nlp.processor.TextProcessor) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText)

Example 7 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class NLPManager method annotateTextAndPersist.

public Node annotateTextAndPersist(String text, String id, boolean checkForLanguage, PipelineSpecification pipelineSpecification) {
    String lang = checkTextLanguage(text, checkForLanguage);
    TextProcessor processor = textProcessorsManager.getTextProcessor(pipelineSpecification.getTextProcessor());
    AnnotatedText annotatedText = processor.annotateText(text, lang, pipelineSpecification);
    return processAnnotationPersist(id, text, annotatedText, pipelineSpecification);
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText)

Example 8 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class AnnotateFunction method getAnnotation.

@UserFunction("ga.nlp.processor.annotate")
@Description("Perform the annotation on the given text, returns the produced annotation domain")
public Map<String, Object> getAnnotation(@Name("text") String text, @Name("pipelineSpecification") Map<String, Object> specificationInput) {
    if (!specificationInput.containsKey("name")) {
        throw new RuntimeException("You mast specify the name of the pipeline");
    }
    PipelineSpecification spec = getNLPManager().getTextProcessorsManager().getPipelineSpecification((String) specificationInput.get("name"));
    TextProcessor processor = getNLPManager().getTextProcessorsManager().getTextProcessor(spec.getTextProcessor());
    AnnotatedText annotatedText = processor.annotateText(text, spec);
    ObjectMapper mapper = new ObjectMapper();
    mapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false);
    Map map = mapper.convertValue(annotatedText, Map.class);
    return map;
}
Also used : PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) TextProcessor(com.graphaware.nlp.processor.TextProcessor) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) Map(java.util.Map) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) Description(org.neo4j.procedure.Description) UserFunction(org.neo4j.procedure.UserFunction)

Example 9 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class Word2VecProcessor method attach.

public int attach(Word2VecRequest request) {
    try {
        Iterator<Node> tagsIterator;
        if (request.getAnnotatedNode() != null) {
            tagsIterator = getAnnotatedTextTags(request.getAnnotatedNode());
        } else if (request.getTagNode() != null) {
            List<Node> proc = new ArrayList<>();
            proc.add(request.getTagNode());
            tagsIterator = proc.iterator();
        } else if (request.getQuery() != null) {
            tagsIterator = getByQuery(request.getQuery());
        } else {
            throw new RuntimeException("You need to specify or an annotated text " + "or a tag " + "or a query");
        }
        TextProcessor processor = getProcessor(request.getProcessor());
        List<Tag> tags = new ArrayList<>();
        while (tagsIterator.hasNext()) {
            Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
            if (request.getSplitTags()) {
                List<Tag> annotateTags = processor.annotateTags(tag.getLemma(), request.getLang());
                if (annotateTags.size() == 1 && annotateTags.get(0).getLemma().equalsIgnoreCase(tag.getLemma())) {
                    tags.add(tag);
                } else {
                    annotateTags.forEach((newTag) -> {
                        tags.add(newTag);
                        tag.addParent(RELATIONSHIP_IS_RELATED_TO_SUB_TAG, newTag, 0.0f);
                    });
                }
            } else {
                tags.add(tag);
            }
        }
        List<Tag> extendedTags = new ArrayList<>();
        tags.stream().forEach((tag) -> {
            LOG.info("Searching for: " + tag.getLemma().toLowerCase());
            float[] vector = word2VecModel.getWordToVec(tag.getLemma().toLowerCase(), request.getModelName());
            if (vector != null) {
                VectorHandler vectorHandler = new VectorHandler(new DenseVector(vector));
                tag.addProperties(request.getPropertyName(), vectorHandler);
                extendedTags.add(tag);
            }
        });
        AtomicInteger affectedTag = new AtomicInteger(0);
        extendedTags.stream().forEach((newTag) -> {
            if (newTag != null) {
                getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
                affectedTag.incrementAndGet();
            }
        });
        return affectedTag.get();
    } catch (Exception ex) {
        LOG.error("Error!!!! ", ex);
        throw new RuntimeException("Error", ex);
    }
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor) Node(org.neo4j.graphdb.Node) IOException(java.io.IOException) QueryExecutionException(org.neo4j.graphdb.QueryExecutionException) VectorHandler(com.graphaware.nlp.vector.VectorHandler) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tag(com.graphaware.nlp.domain.Tag) DenseVector(com.graphaware.nlp.vector.DenseVector)

Example 10 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class ServiceLoader method loadTextProcessor.

public static TextProcessor loadTextProcessor(String processorClazz) {
    TextProcessor processor;
    try {
        @SuppressWarnings("unchecked") Class<? extends TextProcessor> clazz = (Class<? extends TextProcessor>) Class.forName(processorClazz);
        TextProcessor classInstance = clazz.newInstance();
        if (classInstance instanceof TextProcessor) {
            processor = (TextProcessor) classInstance;
        // datumSerializer.configure(filterContext);
        } else {
            throw new IllegalArgumentException(processorClazz + " is not an TextProcessor");
        }
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException e) {
        LOG.error("Could not instantiate event filter.", e);
        throw new RuntimeException("Could not instantiate event filter.", e);
    }
    return processor;
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor)

Aggregations

TextProcessor (com.graphaware.nlp.processor.TextProcessor)10 AnnotatedText (com.graphaware.nlp.domain.AnnotatedText)6 PipelineSpecification (com.graphaware.nlp.dsl.request.PipelineSpecification)3 Tag (com.graphaware.nlp.domain.Tag)2 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 LoggerFactory (com.graphaware.common.log.LoggerFactory)1 NLPManager (com.graphaware.nlp.NLPManager)1 NLPExtension (com.graphaware.nlp.extension.NLPExtension)1 LanguageManager (com.graphaware.nlp.language.LanguageManager)1 TextUtils.removeApices (com.graphaware.nlp.util.TextUtils.removeApices)1 TextUtils.removeParenthesis (com.graphaware.nlp.util.TextUtils.removeParenthesis)1 DenseVector (com.graphaware.nlp.vector.DenseVector)1 VectorHandler (com.graphaware.nlp.vector.VectorHandler)1 IOException (java.io.IOException)1 List (java.util.List)1 Map (java.util.Map)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 TimeUnit (java.util.concurrent.TimeUnit)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1