use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.
the class TestAnnotatedTesterUnitTest method testTagIsFound.
@Test
public void testTagIsFound() {
AnnotatedText annotatedText = new AnnotatedText();
TestAnnotatedText test = new TestAnnotatedText(annotatedText);
Sentence sentence = new Sentence("hello");
sentence.addTag(new Tag("hello", "en"));
annotatedText.addSentence(sentence);
test.assertSentencesCount(1);
test.assertTagWithLemma("hello");
}
use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.
the class TestAnnotatedTesterUnitTest method testSentenceWithNoText.
@Test(expected = AssertionError.class)
public void testSentenceWithNoText() {
AnnotatedText annotatedText = new AnnotatedText();
TestAnnotatedText test = new TestAnnotatedText(annotatedText);
Sentence sentence = new Sentence("hello it is me");
sentence.addTag(new Tag("hello", "en"));
annotatedText.addSentence(sentence);
test.assertSentenceWithText("hello it is not me");
}
use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.
the class TestAnnotatedTesterUnitTest method testSentenceWithText.
@Test
public void testSentenceWithText() {
AnnotatedText annotatedText = new AnnotatedText();
TestAnnotatedText test = new TestAnnotatedText(annotatedText);
Sentence sentence = new Sentence("hello it is me");
sentence.addTag(new Tag("hello", "en"));
annotatedText.addSentence(sentence);
test.assertSentenceWithText("hello it is me");
}
use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.
the class ConceptNet5Importer method importHierarchy.
public List<Tag> importHierarchy(Tag source, String lang, boolean filterLang, int depth, TextProcessor nlpProcessor, List<String> admittedRelations, List<String> admittedPOS, int limit, double minWeight) {
if (null == admittedRelations || admittedRelations.isEmpty()) {
throw new RuntimeException("Admitted Relationships is empty");
}
List<Tag> res = new CopyOnWriteArrayList<>();
String word = source.getLemma().toLowerCase().replace(" ", "_");
word = removeParenthesis(word);
word = removeApices(word);
final String finalWord = word;
try {
admittedRelations.forEach(rel -> {
ConceptNet5EdgeResult values;
values = client.queryByStart(finalWord, rel, lang, limit);
values.getEdges().stream().forEach((concept) -> {
if (checkAdmittedRelations(concept, admittedRelations) && concept.getWeight() > minWeight && (concept.getStart().equalsIgnoreCase(source.getLemma()) || concept.getEnd().equalsIgnoreCase(source.getLemma())) && (!filterLang || (filterLang && concept.getEndLanguage().equalsIgnoreCase(lang) && concept.getStartLanguage().equalsIgnoreCase(lang)))) {
if (concept.getStart().equalsIgnoreCase(source.getLemma()) && !concept.getStart().equalsIgnoreCase(concept.getEnd())) {
String value = concept.getEnd();
value = removeApices(value);
value = removeParenthesis(value);
Tag annotateTag = tryToAnnotate(value, concept.getEndLanguage(), nlpProcessor);
List<String> posList = annotateTag.getPosAsList();
if (admittedPOS == null || admittedPOS.isEmpty() || posList == null || posList.isEmpty() || posList.stream().filter((pos) -> (admittedPOS.contains(pos))).count() > 0) {
if (depth > 1) {
importHierarchy(annotateTag, lang, filterLang, depth - 1, nlpProcessor, admittedRelations, admittedPOS, limit, minWeight);
}
source.addParent(concept.getRel(), annotateTag, concept.getWeight(), ConceptNet5Enricher.ENRICHER_NAME);
res.add(annotateTag);
}
} else {
Tag annotateTag = tryToAnnotate(concept.getStart(), concept.getStartLanguage(), nlpProcessor);
annotateTag.addParent(concept.getRel(), source, concept.getWeight(), ConceptNet5Enricher.ENRICHER_NAME);
res.add(annotateTag);
}
}
});
});
} catch (Exception ex) {
LOG.error("Error while improting hierarchy for " + word + " (" + lang + "). Ignored!", ex);
}
return res;
}
use of com.graphaware.nlp.domain.Tag in project neo4j-nlp by graphaware.
the class Word2VecProcessor method attach.
public int attach(Word2VecRequest request) {
try {
Iterator<Node> tagsIterator;
if (request.getAnnotatedNode() != null) {
tagsIterator = getAnnotatedTextTags(request.getAnnotatedNode());
} else if (request.getTagNode() != null) {
List<Node> proc = new ArrayList<>();
proc.add(request.getTagNode());
tagsIterator = proc.iterator();
} else if (request.getQuery() != null) {
tagsIterator = getByQuery(request.getQuery());
} else {
throw new RuntimeException("You need to specify or an annotated text " + "or a tag " + "or a query");
}
TextProcessor processor = getProcessor(request.getProcessor());
List<Tag> tags = new ArrayList<>();
while (tagsIterator.hasNext()) {
Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
if (request.getSplitTags()) {
List<Tag> annotateTags = processor.annotateTags(tag.getLemma(), request.getLang());
if (annotateTags.size() == 1 && annotateTags.get(0).getLemma().equalsIgnoreCase(tag.getLemma())) {
tags.add(tag);
} else {
annotateTags.forEach((newTag) -> {
tags.add(newTag);
tag.addParent(RELATIONSHIP_IS_RELATED_TO_SUB_TAG, newTag, 0.0f);
});
}
} else {
tags.add(tag);
}
}
List<Tag> extendedTags = new ArrayList<>();
tags.stream().forEach((tag) -> {
LOG.info("Searching for: " + tag.getLemma().toLowerCase());
float[] vector = word2VecModel.getWordToVec(tag.getLemma().toLowerCase(), request.getModelName());
if (vector != null) {
VectorHandler vectorHandler = new VectorHandler(new DenseVector(vector));
tag.addProperties(request.getPropertyName(), vectorHandler);
extendedTags.add(tag);
}
});
AtomicInteger affectedTag = new AtomicInteger(0);
extendedTags.stream().forEach((newTag) -> {
if (newTag != null) {
getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
affectedTag.incrementAndGet();
}
});
return affectedTag.get();
} catch (Exception ex) {
LOG.error("Error!!!! ", ex);
throw new RuntimeException("Error", ex);
}
}
Aggregations