use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.
the class OpenNlpService method convertTextAnnotationsToNamedEntities.
public void convertTextAnnotationsToNamedEntities(String[] tokens, List<TextAnnotation> TextAnnotations, Map<String, Set<String>> namedEntities) {
for (TextAnnotation TextAnnotation : TextAnnotations) {
int start = TextAnnotation.getSpan().getStart();
int end = TextAnnotation.getSpan().getEnd();
String[] TextAnnotationData = Arrays.copyOfRange(tokens, start, end);
String content = Joiner.on(" ").join(TextAnnotationData);
String type = TextAnnotation.getType();
if (!namedEntities.containsKey(type)) {
Set<String> typeList = Sets.newHashSet();
namedEntities.put(type, typeList);
}
namedEntities.get(type).add(content);
}
}
use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.
the class OpenNlpService method tokenize.
public Map<String, Set<String>> tokenize(String content) {
Map<String, Set<String>> namedEntities = Maps.newHashMap();
List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
for (Map.Entry<String, TokenNameFinderModel> finderEntry : finders.entrySet()) {
String type = finderEntry.getKey();
NameFinderME finder = new NameFinderME(finderEntry.getValue());
Span[] spans = finder.find(tokens);
double[] probs = finder.probs(spans);
for (int ni = 0; ni < spans.length; ni++) {
allTextAnnotations.add(new TextAnnotation(type, spans[ni], probs[ni]));
}
}
if (allTextAnnotations.size() > 0) {
removeConflicts(allTextAnnotations);
}
convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
return namedEntities;
}
use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.
the class OpenNlpService method removeConflicts.
/* Copied from https://github.com/tamingtext/book/blob/master/src/test/java/com/tamingtext/opennlp/NameFinderTest.java */
private void removeConflicts(List<TextAnnotation> allTextAnnotations) {
java.util.Collections.sort(allTextAnnotations);
List<TextAnnotation> stack = new ArrayList<TextAnnotation>();
stack.add(allTextAnnotations.get(0));
for (int ai = 1; ai < allTextAnnotations.size(); ai++) {
TextAnnotation curr = allTextAnnotations.get(ai);
boolean deleteCurr = false;
for (int ki = stack.size() - 1; ki >= 0; ki--) {
TextAnnotation prev = stack.get(ki);
if (prev.getSpan().equals(curr.getSpan())) {
if (prev.getProb() > curr.getProb()) {
deleteCurr = true;
break;
} else {
allTextAnnotations.remove(stack.remove(ki));
ai--;
}
} else if (prev.getSpan().intersects(curr.getSpan())) {
if (prev.getProb() > curr.getProb()) {
deleteCurr = true;
break;
} else {
allTextAnnotations.remove(stack.remove(ki));
ai--;
}
} else if (prev.getSpan().contains(curr.getSpan())) {
break;
} else {
stack.remove(ki);
}
}
if (deleteCurr) {
allTextAnnotations.remove(ai);
ai--;
deleteCurr = false;
} else {
stack.add(curr);
}
}
}
use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.
the class SimpleNlpTest method convertTextAnnotationsToNamedEntities.
public void convertTextAnnotationsToNamedEntities(String[] tokens, List<TextAnnotation> TextAnnotations, Map<String, Set<String>> namedEntities) {
for (TextAnnotation TextAnnotation : TextAnnotations) {
int start = TextAnnotation.getSpan().getStart();
int end = TextAnnotation.getSpan().getEnd();
String[] TextAnnotationData = Arrays.copyOfRange(tokens, start, end);
String content = Joiner.on(" ").join(TextAnnotationData);
String type = TextAnnotation.getType();
if (!namedEntities.containsKey(type)) {
Set<String> typeList = Sets.newHashSet();
namedEntities.put(type, typeList);
}
namedEntities.get(type).add(content);
}
}
use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.
the class SimpleNlpTest method testThatMultipleFindersWork.
@Test
public void testThatMultipleFindersWork() throws Exception {
loadFinders();
Map<String, Set<String>> namedEntities = Maps.newHashMap();
for (int si = 0; si < sentences.length; si++) {
List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
String[] tokens = tokenizer.tokenize(sentences[si]);
for (int fi = 0; fi < finders.length; fi++) {
Span[] spans = finders[fi].find(tokens);
double[] probs = finders[fi].probs(spans);
for (int ni = 0; ni < spans.length; ni++) {
allTextAnnotations.add(new TextAnnotation(names[fi], spans[ni], probs[ni]));
}
}
removeConflicts(allTextAnnotations);
convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
}
assertThat(namedEntities.get("person"), hasSize(3));
assertThat(namedEntities.get("person"), containsInAnyOrder("Nancy Reagan", "Reagan", "Joanne Drake"));
assertThat(namedEntities.get("location"), hasSize(3));
assertThat(namedEntities.get("location"), containsInAnyOrder("Los Angeles", "Santa Monica", "California"));
assertThat(namedEntities.get("date"), hasSize(1));
assertThat(namedEntities.get("date"), containsInAnyOrder("Sunday"));
}
Aggregations