use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.
the class NLPManager method annotateTextAndPersist.
public Node annotateTextAndPersist(String text, String id, String textProcessor, String pipelineName, boolean force, boolean checkForLanguage) {
String lang = checkTextLanguage(text, checkForLanguage);
String pipeline = getPipeline(pipelineName);
PipelineSpecification pipelineSpecification = getConfiguration().loadPipeline(pipeline);
if (null == pipelineSpecification) {
throw new RuntimeException("No pipeline " + pipelineName + " found.");
}
TextProcessor processor = textProcessorsManager.getTextProcessor(pipelineSpecification.getTextProcessor());
AnnotatedText at = processor.annotateText(text, lang, pipelineSpecification);
return processAnnotationPersist(id, text, at, pipelineSpecification);
}
use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.
the class NLPManager method annotateTextAndPersist.
public Node annotateTextAndPersist(String text, String id, boolean checkForLanguage, PipelineSpecification pipelineSpecification) {
String lang = checkTextLanguage(text, checkForLanguage);
TextProcessor processor = textProcessorsManager.getTextProcessor(pipelineSpecification.getTextProcessor());
AnnotatedText annotatedText = processor.annotateText(text, lang, pipelineSpecification);
return processAnnotationPersist(id, text, annotatedText, pipelineSpecification);
}
use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.
the class AnnotateFunction method getAnnotation.
@UserFunction("ga.nlp.processor.annotate")
@Description("Perform the annotation on the given text, returns the produced annotation domain")
public Map<String, Object> getAnnotation(@Name("text") String text, @Name("pipelineSpecification") Map<String, Object> specificationInput) {
if (!specificationInput.containsKey("name")) {
throw new RuntimeException("You mast specify the name of the pipeline");
}
PipelineSpecification spec = getNLPManager().getTextProcessorsManager().getPipelineSpecification((String) specificationInput.get("name"));
TextProcessor processor = getNLPManager().getTextProcessorsManager().getTextProcessor(spec.getTextProcessor());
AnnotatedText annotatedText = processor.annotateText(text, spec);
ObjectMapper mapper = new ObjectMapper();
mapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false);
Map map = mapper.convertValue(annotatedText, Map.class);
return map;
}
use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.
the class Word2VecProcessor method attach.
public int attach(Word2VecRequest request) {
try {
Iterator<Node> tagsIterator;
if (request.getAnnotatedNode() != null) {
tagsIterator = getAnnotatedTextTags(request.getAnnotatedNode());
} else if (request.getTagNode() != null) {
List<Node> proc = new ArrayList<>();
proc.add(request.getTagNode());
tagsIterator = proc.iterator();
} else if (request.getQuery() != null) {
tagsIterator = getByQuery(request.getQuery());
} else {
throw new RuntimeException("You need to specify or an annotated text " + "or a tag " + "or a query");
}
TextProcessor processor = getProcessor(request.getProcessor());
List<Tag> tags = new ArrayList<>();
while (tagsIterator.hasNext()) {
Tag tag = (Tag) getPersister(Tag.class).fromNode(tagsIterator.next());
if (request.getSplitTags()) {
List<Tag> annotateTags = processor.annotateTags(tag.getLemma(), request.getLang());
if (annotateTags.size() == 1 && annotateTags.get(0).getLemma().equalsIgnoreCase(tag.getLemma())) {
tags.add(tag);
} else {
annotateTags.forEach((newTag) -> {
tags.add(newTag);
tag.addParent(RELATIONSHIP_IS_RELATED_TO_SUB_TAG, newTag, 0.0f);
});
}
} else {
tags.add(tag);
}
}
List<Tag> extendedTags = new ArrayList<>();
tags.stream().forEach((tag) -> {
LOG.info("Searching for: " + tag.getLemma().toLowerCase());
float[] vector = word2VecModel.getWordToVec(tag.getLemma().toLowerCase(), request.getModelName());
if (vector != null) {
VectorHandler vectorHandler = new VectorHandler(new DenseVector(vector));
tag.addProperties(request.getPropertyName(), vectorHandler);
extendedTags.add(tag);
}
});
AtomicInteger affectedTag = new AtomicInteger(0);
extendedTags.stream().forEach((newTag) -> {
if (newTag != null) {
getPersister(Tag.class).getOrCreate(newTag, newTag.getId(), String.valueOf(System.currentTimeMillis()));
affectedTag.incrementAndGet();
}
});
return affectedTag.get();
} catch (Exception ex) {
LOG.error("Error!!!! ", ex);
throw new RuntimeException("Error", ex);
}
}
use of com.graphaware.nlp.processor.TextProcessor in project neo4j-nlp by graphaware.
the class ServiceLoader method loadTextProcessor.
public static TextProcessor loadTextProcessor(String processorClazz) {
TextProcessor processor;
try {
@SuppressWarnings("unchecked") Class<? extends TextProcessor> clazz = (Class<? extends TextProcessor>) Class.forName(processorClazz);
TextProcessor classInstance = clazz.newInstance();
if (classInstance instanceof TextProcessor) {
processor = (TextProcessor) classInstance;
// datumSerializer.configure(filterContext);
} else {
throw new IllegalArgumentException(processorClazz + " is not an TextProcessor");
}
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException e) {
LOG.error("Could not instantiate event filter.", e);
throw new RuntimeException("Could not instantiate event filter.", e);
}
return processor;
}
Aggregations