Search in sources :

Example 1 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class ServiceLoader method loadNLPExtension.

public static NLPExtension loadNLPExtension(String extensionClazz) {
    NLPExtension extension;
    try {
        @SuppressWarnings("unchecked") Class<? extends NLPExtension> clazz = (Class<? extends NLPExtension>) Class.forName(extensionClazz);
        NLPExtension classInstance = clazz.newInstance();
        if (classInstance instanceof TextProcessor) {
            extension = (NLPExtension) classInstance;
        // datumSerializer.configure(filterContext);
        } else {
            throw new IllegalArgumentException(extensionClazz + " is not an NLP Extension");
        }
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException e) {
        LOG.error("Could not instantiate event filter.", e);
        throw new RuntimeException("Could not instantiate event filter.", e);
    }
    return extension;
}
Also used : NLPExtension(com.graphaware.nlp.extension.NLPExtension) TextProcessor(com.graphaware.nlp.processor.TextProcessor)

Example 2 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class NLPManager method applySentiment.

public void applySentiment(Node node, String textProcessor) {
    TextProcessor processor = textProcessor.equals("") ? getTextProcessorsManager().getDefaultProcessor() : getTextProcessorsManager().getTextProcessor(textProcessor);
    AnnotatedText annotatedText = (AnnotatedText) getPersister(AnnotatedText.class).fromNode(node);
    processor.sentiment(annotatedText);
    getPersister(AnnotatedText.class).persist(annotatedText, node.getProperty(configuration.getPropertyKeyFor(Properties.PROPERTY_ID)).toString(), String.valueOf(System.currentTimeMillis()));
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText)

Example 3 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class NLPManager method filter.

public Boolean filter(FilterRequest filterRequest) {
    String text = filterRequest.getText();
    checkTextLanguage(text, false);
    String lang = LanguageManager.getInstance().detectLanguage(text);
    String filter = filterRequest.getFilter();
    String pipeline = getPipeline(filterRequest.getPipeline());
    PipelineSpecification pipelineSpecification = configuration.loadPipeline(pipeline);
    TextProcessor currentTP = textProcessorsManager.getTextProcessor(pipelineSpecification.getTextProcessor());
    AnnotatedText annotatedText = currentTP.annotateText(text, lang, pipelineSpecification);
    return annotatedText.filter(filter);
}
Also used : PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) TextProcessor(com.graphaware.nlp.processor.TextProcessor) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText)

Example 4 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class MicrosoftConceptEnricher method importConcept.

@Override
public Node importConcept(ConceptRequest request) {
    List<Tag> conceptTags = new ArrayList<>();
    TextProcessor processor = getProcessor(request.getProcessor());
    List<Tag> tags = new ArrayList<>();
    Pair<Iterator<Node>, Node> pair = getTagsIteratorFromRequest(request);
    Iterator<Node> tagsIterator = pair.first();
    Node tagToBeAnnotated = pair.second();
    while (tagsIterator.hasNext()) {
        Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
        tags.add(tag);
    }
    tags.forEach(tag -> {
        getConcepts(tag, 20).forEach(conceptTag -> {
            Tag annotatedTag = tryToAnnotate(conceptTag.getLemma(), "en", processor);
            conceptTag.getParents().forEach(parent -> {
                annotatedTag.addParent(parent);
            });
            conceptTags.add(annotatedTag);
        });
        conceptTags.add(tag);
    });
    conceptTags.forEach((newTag) -> {
        if (newTag != null) {
            getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
        }
    });
    return tagToBeAnnotated;
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor) Node(org.neo4j.graphdb.Node) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) Tag(com.graphaware.nlp.domain.Tag)

Example 5 with TextProcessor

use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.

the class Word2VecProcessor method attach.

public int attach(Word2VecRequest request) {
    try {
        Iterator<Node> tagsIterator;
        if (request.getAnnotatedNode() != null) {
            tagsIterator = getAnnotatedTextTags(request.getAnnotatedNode());
        } else if (request.getTagNode() != null) {
            List<Node> proc = new ArrayList<>();
            proc.add(request.getTagNode());
            tagsIterator = proc.iterator();
        } else if (request.getQuery() != null) {
            tagsIterator = getByQuery(request.getQuery());
        } else {
            throw new RuntimeException("You need to specify or an annotated text " + "or a tag " + "or a query");
        }
        TextProcessor processor = getProcessor(request.getProcessor());
        List<Tag> tags = new ArrayList<>();
        while (tagsIterator.hasNext()) {
            Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
            if (request.getSplitTags()) {
                List<Tag> annotateTags = processor.annotateTags(tag.getLemma(), request.getLang());
                if (annotateTags.size() == 1 && annotateTags.get(0).getLemma().equalsIgnoreCase(tag.getLemma())) {
                    tags.add(tag);
                } else {
                    annotateTags.forEach((newTag) -> {
                        tags.add(newTag);
                        tag.addParent(RELATIONSHIP_IS_RELATED_TO_SUB_TAG, newTag, 0.0f);
                    });
                }
            } else {
                tags.add(tag);
            }
        }
        List<Tag> extendedTags = new ArrayList<>();
        tags.stream().forEach((tag) -> {
            LOG.info("Searching for: " + tag.getLemma().toLowerCase());
            double[] vector = word2VecModel.getWordToVec(tag.getLemma().toLowerCase(), request.getModelName());
            if (vector != null) {
                tag.addProperties(request.getPropertyName(), vector);
                extendedTags.add(tag);
            }
        });
        AtomicInteger affectedTag = new AtomicInteger(0);
        extendedTags.stream().forEach((newTag) -> {
            if (newTag != null) {
                getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
                affectedTag.incrementAndGet();
            }
        });
        return affectedTag.get();
    } catch (Exception ex) {
        LOG.error("Error!!!! ", ex);
        throw new RuntimeException("Error", ex);
    }
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Node(org.neo4j.graphdb.Node) Tag(com.graphaware.nlp.domain.Tag) QueryExecutionException(org.neo4j.graphdb.QueryExecutionException)

Aggregations

TextProcessor (com.graphaware.nlp.processor.TextProcessor)11 AnnotatedText (com.graphaware.nlp.domain.AnnotatedText)5 Tag (com.graphaware.nlp.domain.Tag)4 PipelineSpecification (com.graphaware.nlp.dsl.request.PipelineSpecification)4 Node (org.neo4j.graphdb.Node)2 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 LoggerFactory (com.graphaware.common.log.LoggerFactory)1 NLPManager (com.graphaware.nlp.NLPManager)1 NLPExtension (com.graphaware.nlp.extension.NLPExtension)1 LanguageManager (com.graphaware.nlp.language.LanguageManager)1 TextUtils.removeApices (com.graphaware.nlp.util.TextUtils.removeApices)1 TextUtils.removeParenthesis (com.graphaware.nlp.util.TextUtils.removeParenthesis)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 List (java.util.List)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 TimeUnit (java.util.concurrent.TimeUnit)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ObjectMapper (org.codehaus.jackson.map.ObjectMapper)1