Search in sources :

Example 16 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class ConceptNet5Enricher method importConcept.

public Node importConcept(ConceptRequest request) {
    List<Tag> conceptTags = new ArrayList<>();
    Node annotatedNode = request.getAnnotatedNode();
    Pair<Iterator<Node>, Node> pair = getTagsIteratorFromRequest(request);
    Iterator<Node> tagsIterator = pair.first();
    Node tagToBeAnnotated = pair.second();
    int depth = request.getDepth();
    Boolean splitTags = request.isSplitTag();
    Boolean filterByLang = request.isFilterByLanguage();
    List<String> admittedRelationships = request.getAdmittedRelationships();
    List<String> admittedPos = request.getAdmittedPos();
    List<String> outputLanguages = request.getOutputLanguages();
    if (outputLanguages != null) {
        outputLanguages.replaceAll(String::toLowerCase);
    }
    RelDirection relDirection = RelDirection.getRelDirection(request.getRelDirection());
    List<Tag> tags = new ArrayList<>();
    while (tagsIterator.hasNext()) {
        Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
        if (splitTags) {
            List<Tag> annotateTags = NLPManager.getInstance().getTextProcessorsManager().annotateTags(tag.getLemma(), tag.getLanguage());
            if (annotateTags.size() == 1 && annotateTags.get(0).getLemma().equalsIgnoreCase(tag.getLemma())) {
                tags.add(tag);
            } else {
                annotateTags.forEach((newTag) -> {
                    tags.add(newTag);
                    tag.addParent(RELATIONSHIP_IS_RELATED_TO_SUB_TAG, newTag, 0.0f);
                });
            // conceptTags.add(tag);
            }
        } else {
            tags.add(tag);
        }
    }
    tags.stream().forEach((tag) -> {
        List<RelDirection> relDirections;
        if (relDirection == RelDirection.BOTH) {
            relDirections = Arrays.asList(RelDirection.IN, RelDirection.OUT);
        } else {
            relDirections = Arrays.asList(relDirection);
        }
        relDirections.stream().forEach(direction -> {
            conceptTags.addAll(getImporter().importHierarchy(tag, direction, filterByLang, outputLanguages, depth, admittedRelationships, admittedPos, request.getResultsLimit(), request.getMinWeight()));
        });
        conceptTags.add(tag);
    });
    conceptTags.stream().forEach((newTag) -> {
        if (newTag != null) {
            getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
        }
    });
    if (annotatedNode != null) {
        return annotatedNode;
    } else {
        // return Iterators.asRawIterator(result.iterator());
        return tagToBeAnnotated;
    }
}
Also used : Tag(com.graphaware.nlp.domain.Tag)

Example 17 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class AnnotatedTextPersistenceTest method createAnnotatedTextWithSameTagInSameTextWithDifferentPos.

private AnnotatedText createAnnotatedTextWithSameTagInSameTextWithDifferentPos() {
    AnnotatedText annotatedText = new AnnotatedText();
    AtomicInteger inc = new AtomicInteger();
    for (String s : "Hello my name is cool. And I am cool.".split("\\.")) {
        Sentence sentence = new Sentence(s, inc.get());
        for (String token : s.split(" ")) {
            Tag tag = new Tag(token, "en");
            if (token.equals("cool")) {
                int v = inc.get();
                tag.setPos(Collections.singletonList("cool" + v));
                tag.setNe(Collections.singletonList("NER_Cool" + v));
            }
            sentence.addTagOccurrence(0, 20, token, sentence.addTag(tag));
        }
        inc.incrementAndGet();
        annotatedText.addSentence(sentence);
    }
    return annotatedText;
}
Also used : AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tag(com.graphaware.nlp.domain.Tag) Sentence(com.graphaware.nlp.domain.Sentence)

Example 18 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class AnnotatedTextPersistenceTest method createAnnotatedTextFor.

private AnnotatedText createAnnotatedTextFor(String text, String expectedTokenForPOS, String expectedPOS) {
    AnnotatedText annotatedText = new AnnotatedText();
    annotatedText.setText(text);
    AtomicInteger inc = new AtomicInteger();
    for (String s : text.split("\\.")) {
        Sentence sentence = new Sentence(s, inc.get());
        for (String token : s.split(" ")) {
            Tag tag = new Tag(token, "en");
            if (token.equals(expectedTokenForPOS)) {
                tag.setPos(Collections.singletonList(expectedPOS));
            }
            sentence.addTagOccurrence(0, 20, token, sentence.addTag(tag));
        }
        inc.incrementAndGet();
        annotatedText.addSentence(sentence);
    }
    return annotatedText;
}
Also used : AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tag(com.graphaware.nlp.domain.Tag) Sentence(com.graphaware.nlp.domain.Sentence)

Example 19 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class StubTextProcessor method annotateText.

@Override
public AnnotatedText annotateText(String text, PipelineSpecification pipelineSpecification) {
    this.lastPipelineUsed = pipelineSpecification.getName();
    AnnotatedText annotatedText = new AnnotatedText();
    String[] sentencesSplit = text.split("\\.");
    int sentenceNumber = 0;
    for (String stext : sentencesSplit) {
        String[] parts = stext.split(" ");
        int pos = 0;
        final Sentence sentence = new Sentence(stext, sentenceNumber);
        for (String token : parts) {
            Tag tag = new Tag(token, pipelineSpecification.getLanguage());
            if (!pipelineSpecification.getExcludedNER().contains("test")) {
                tag.setNe(Collections.singletonList("test"));
            }
            tag.setPos(Collections.singletonList("TESTVB"));
            int begin = pos;
            pos += token.length() + 1;
            sentence.addTagOccurrence(begin, pos, token, sentence.addTag(tag));
        }
        if (pipelineSpecification.hasProcessingStep("phrase")) {
            Phrase phrase = new Phrase(stext);
            sentence.addPhraseOccurrence(0, stext.length(), phrase);
        }
        annotatedText.addSentence(sentence);
        sentenceNumber++;
    }
    return annotatedText;
}
Also used : AnnotatedText(com.graphaware.nlp.domain.AnnotatedText) Tag(com.graphaware.nlp.domain.Tag) Phrase(com.graphaware.nlp.domain.Phrase) Sentence(com.graphaware.nlp.domain.Sentence)

Example 20 with Tag

use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.

the class TagPersister method storeTagParent.

private void storeTagParent(Node tagNode, Tag tag, String txId) {
    if (tag.getParents() != null) {
        tag.getParents().stream().forEach((tagRelationship) -> {
            Tag parent = tagRelationship.getParent();
            Node parentTagNode = getOrCreate(parent, parent.getId(), txId);
            long sourceId = tagNode.getId();
            long targetId = parentTagNode.getId();
            // @todo mode type and weight to config constants
            String query = String.format("MATCH (source:`%s`), (target:`%s`) " + "WHERE id(source) = {source} AND id(target) = {target} " + "MERGE (source)-[r:`%s` {%s: {type} }]->(target) " + "ON CREATE SET r.%s = {weight}, r.source = {sourceId} ", configuration().getLabelFor(Labels.Tag), configuration().getLabelFor(Labels.Tag), Relationships.IS_RELATED_TO, "type", "weight");
            Map<String, Object> parameters = new HashMap<>();
            parameters.put("source", sourceId);
            parameters.put("target", targetId);
            parameters.put("type", tagRelationship.getRelation());
            parameters.put("weight", tagRelationship.getWeight());
            parameters.put("sourceId", tagRelationship.getSource());
            getDatabase().execute(query, parameters);
        });
    }
}
Also used : Node(org.neo4j.graphdb.Node) Tag(com.graphaware.nlp.domain.Tag)

Aggregations

Tag (com.graphaware.nlp.domain.Tag)20 Sentence (com.graphaware.nlp.domain.Sentence)9 AnnotatedText (com.graphaware.nlp.domain.AnnotatedText)8 TagUtils.newTag (com.graphaware.nlp.util.TagUtils.newTag)5 TestAnnotatedText (com.graphaware.nlp.util.TestAnnotatedText)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 Node (org.neo4j.graphdb.Node)3 Cache (com.google.common.cache.Cache)2 CacheBuilder (com.google.common.cache.CacheBuilder)2 LoggerFactory (com.graphaware.common.log.LoggerFactory)2 TextProcessor (com.graphaware.nlp.processor.TextProcessor)2 TextUtils.removeApices (com.graphaware.nlp.util.TextUtils.removeApices)2 TextUtils.removeParenthesis (com.graphaware.nlp.util.TextUtils.removeParenthesis)2 ClientResponse (com.sun.jersey.api.client.ClientResponse)2 WebResource (com.sun.jersey.api.client.WebResource)2 List (java.util.List)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 TimeUnit (java.util.concurrent.TimeUnit)2