Search in sources :

Example 11 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class TestAnnotatedTesterUnitTest method testTagIsFound.

@Test
public void testTagIsFound() {
    AnnotatedText annotatedText = new AnnotatedText();
    TestAnnotatedText test = new TestAnnotatedText(annotatedText);
    Sentence sentence = new Sentence("hello");
    sentence.addTag(new Tag("hello", "en"));
    annotatedText.addSentence(sentence);
    test.assertSentencesCount(1);
    test.assertTagWithLemma("hello");
}
Also used : TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) TagUtils.newTag(com.graphaware.nlp.util.TagUtils.newTag) Tag(com.graphaware.nlp.domain.Tag) Sentence(com.graphaware.nlp.domain.Sentence) Test(org.junit.Test)

Example 12 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class TestAnnotatedTesterUnitTest method testSentenceWithNoText.

@Test(expected = AssertionError.class)
public void testSentenceWithNoText() {
    AnnotatedText annotatedText = new AnnotatedText();
    TestAnnotatedText test = new TestAnnotatedText(annotatedText);
    Sentence sentence = new Sentence("hello it is me");
    sentence.addTag(new Tag("hello", "en"));
    annotatedText.addSentence(sentence);
    test.assertSentenceWithText("hello it is not me");
}
Also used : TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) TagUtils.newTag(com.graphaware.nlp.util.TagUtils.newTag) Tag(com.graphaware.nlp.domain.Tag) Sentence(com.graphaware.nlp.domain.Sentence) Test(org.junit.Test)

Example 13 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class TestAnnotatedTesterUnitTest method testSentenceWithText.

@Test
public void testSentenceWithText() {
    AnnotatedText annotatedText = new AnnotatedText();
    TestAnnotatedText test = new TestAnnotatedText(annotatedText);
    Sentence sentence = new Sentence("hello it is me");
    sentence.addTag(new Tag("hello", "en"));
    annotatedText.addSentence(sentence);
    test.assertSentenceWithText("hello it is me");
}
Also used : TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) TestAnnotatedText(com.graphaware.nlp.util.TestAnnotatedText) TagUtils.newTag(com.graphaware.nlp.util.TagUtils.newTag) Tag(com.graphaware.nlp.domain.Tag) Sentence(com.graphaware.nlp.domain.Sentence) Test(org.junit.Test)

Example 14 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class ConceptNet5Importer method importHierarchy.

public List<Tag> importHierarchy(Tag source, String lang, boolean filterLang, int depth, TextProcessor nlpProcessor, List<String> admittedRelations, List<String> admittedPOS, int limit, double minWeight) {
    if (null == admittedRelations || admittedRelations.isEmpty()) {
        throw new RuntimeException("Admitted Relationships is empty");
    }
    List<Tag> res = new CopyOnWriteArrayList<>();
    String word = source.getLemma().toLowerCase().replace(" ", "_");
    word = removeParenthesis(word);
    word = removeApices(word);
    final String finalWord = word;
    try {
        admittedRelations.forEach(rel -> {
            ConceptNet5EdgeResult values;
            values = client.queryByStart(finalWord, rel, lang, limit);
            values.getEdges().stream().forEach((concept) -> {
                if (checkAdmittedRelations(concept, admittedRelations) && concept.getWeight() > minWeight && (concept.getStart().equalsIgnoreCase(source.getLemma()) || concept.getEnd().equalsIgnoreCase(source.getLemma())) && (!filterLang || (filterLang && concept.getEndLanguage().equalsIgnoreCase(lang) && concept.getStartLanguage().equalsIgnoreCase(lang)))) {
                    if (concept.getStart().equalsIgnoreCase(source.getLemma()) && !concept.getStart().equalsIgnoreCase(concept.getEnd())) {
                        String value = concept.getEnd();
                        value = removeApices(value);
                        value = removeParenthesis(value);
                        Tag annotateTag = tryToAnnotate(value, concept.getEndLanguage(), nlpProcessor);
                        List<String> posList = annotateTag.getPosAsList();
                        if (admittedPOS == null || admittedPOS.isEmpty() || posList == null || posList.isEmpty() || posList.stream().filter((pos) -> (admittedPOS.contains(pos))).count() > 0) {
                            if (depth > 1) {
                                importHierarchy(annotateTag, lang, filterLang, depth - 1, nlpProcessor, admittedRelations, admittedPOS, limit, minWeight);
                            }
                            source.addParent(concept.getRel(), annotateTag, concept.getWeight(), ConceptNet5Enricher.ENRICHER_NAME);
                            res.add(annotateTag);
                        }
                    } else {
                        Tag annotateTag = tryToAnnotate(concept.getStart(), concept.getStartLanguage(), nlpProcessor);
                        annotateTag.addParent(concept.getRel(), source, concept.getWeight(), ConceptNet5Enricher.ENRICHER_NAME);
                        res.add(annotateTag);
                    }
                }
            });
        });
    } catch (Exception ex) {
        LOG.error("Error while improting hierarchy for " + word + " (" + lang + "). Ignored!", ex);
    }
    return res;
}
Also used : NLPManager(com.graphaware.nlp.NLPManager) Log(org.neo4j.logging.Log) TextUtils.removeApices(com.graphaware.nlp.util.TextUtils.removeApices) TextProcessor(com.graphaware.nlp.processor.TextProcessor) TextUtils.removeParenthesis(com.graphaware.nlp.util.TextUtils.removeParenthesis) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) LoggerFactory(com.graphaware.common.log.LoggerFactory) CacheBuilder(com.google.common.cache.CacheBuilder) LanguageManager(com.graphaware.nlp.language.LanguageManager) Cache(com.google.common.cache.Cache) PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) Tag(com.graphaware.nlp.domain.Tag) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Tag(com.graphaware.nlp.domain.Tag) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 15 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class Word2VecProcessor method attach.

public int attach(Word2VecRequest request) {
    try {
        Iterator<Node> tagsIterator;
        if (request.getAnnotatedNode() != null) {
            tagsIterator = getAnnotatedTextTags(request.getAnnotatedNode());
        } else if (request.getTagNode() != null) {
            List<Node> proc = new ArrayList<>();
            proc.add(request.getTagNode());
            tagsIterator = proc.iterator();
        } else if (request.getQuery() != null) {
            tagsIterator = getByQuery(request.getQuery());
        } else {
            throw new RuntimeException("You need to specify or an annotated text " + "or a tag " + "or a query");
        }
        TextProcessor processor = getProcessor(request.getProcessor());
        List<Tag> tags = new ArrayList<>();
        while (tagsIterator.hasNext()) {
            Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
            if (request.getSplitTags()) {
                List<Tag> annotateTags = processor.annotateTags(tag.getLemma(), request.getLang());
                if (annotateTags.size() == 1 && annotateTags.get(0).getLemma().equalsIgnoreCase(tag.getLemma())) {
                    tags.add(tag);
                } else {
                    annotateTags.forEach((newTag) -> {
                        tags.add(newTag);
                        tag.addParent(RELATIONSHIP_IS_RELATED_TO_SUB_TAG, newTag, 0.0f);
                    });
                }
            } else {
                tags.add(tag);
            }
        }
        List<Tag> extendedTags = new ArrayList<>();
        tags.stream().forEach((tag) -> {
            LOG.info("Searching for: " + tag.getLemma().toLowerCase());
            float[] vector = word2VecModel.getWordToVec(tag.getLemma().toLowerCase(), request.getModelName());
            if (vector != null) {
                VectorHandler vectorHandler = new VectorHandler(new DenseVector(vector));
                tag.addProperties(request.getPropertyName(), vectorHandler);
                extendedTags.add(tag);
            }
        });
        AtomicInteger affectedTag = new AtomicInteger(0);
        extendedTags.stream().forEach((newTag) -> {
            if (newTag != null) {
                getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
                affectedTag.incrementAndGet();
            }
        });
        return affectedTag.get();
    } catch (Exception ex) {
        LOG.error("Error!!!! ", ex);
        throw new RuntimeException("Error", ex);
    }
}
Also used : TextProcessor(com.graphaware.nlp.processor.TextProcessor) Node(org.neo4j.graphdb.Node) IOException(java.io.IOException) QueryExecutionException(org.neo4j.graphdb.QueryExecutionException) VectorHandler(com.graphaware.nlp.vector.VectorHandler) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tag(com.graphaware.nlp.domain.Tag) DenseVector(com.graphaware.nlp.vector.DenseVector)

Aggregations

Tag (com.graphaware.nlp.domain.Tag)20 Sentence (com.graphaware.nlp.domain.Sentence)9 AnnotatedText (com.graphaware.nlp.domain.AnnotatedText)8 TagUtils.newTag (com.graphaware.nlp.util.TagUtils.newTag)5 TestAnnotatedText (com.graphaware.nlp.util.TestAnnotatedText)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 Node (org.neo4j.graphdb.Node)3 Cache (com.google.common.cache.Cache)2 CacheBuilder (com.google.common.cache.CacheBuilder)2 LoggerFactory (com.graphaware.common.log.LoggerFactory)2 TextProcessor (com.graphaware.nlp.processor.TextProcessor)2 TextUtils.removeApices (com.graphaware.nlp.util.TextUtils.removeApices)2 TextUtils.removeParenthesis (com.graphaware.nlp.util.TextUtils.removeParenthesis)2 ClientResponse (com.sun.jersey.api.client.ClientResponse)2 WebResource (com.sun.jersey.api.client.WebResource)2 List (java.util.List)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 TimeUnit (java.util.concurrent.TimeUnit)2