Search in sources :

Example 1 with TextAnnotation

use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.

the class OpenNlpService method convertTextAnnotationsToNamedEntities.

public void convertTextAnnotationsToNamedEntities(String[] tokens, List<TextAnnotation> TextAnnotations, Map<String, Set<String>> namedEntities) {
    for (TextAnnotation TextAnnotation : TextAnnotations) {
        int start = TextAnnotation.getSpan().getStart();
        int end = TextAnnotation.getSpan().getEnd();
        String[] TextAnnotationData = Arrays.copyOfRange(tokens, start, end);
        String content = Joiner.on(" ").join(TextAnnotationData);
        String type = TextAnnotation.getType();
        if (!namedEntities.containsKey(type)) {
            Set<String> typeList = Sets.newHashSet();
            namedEntities.put(type, typeList);
        }
        namedEntities.get(type).add(content);
    }
}
Also used : TextAnnotation(org.elasticsearch.service.opennlp.models.TextAnnotation)

Example 2 with TextAnnotation

use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.

the class OpenNlpService method tokenize.

public Map<String, Set<String>> tokenize(String content) {
    Map<String, Set<String>> namedEntities = Maps.newHashMap();
    List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
    String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
    for (Map.Entry<String, TokenNameFinderModel> finderEntry : finders.entrySet()) {
        String type = finderEntry.getKey();
        NameFinderME finder = new NameFinderME(finderEntry.getValue());
        Span[] spans = finder.find(tokens);
        double[] probs = finder.probs(spans);
        for (int ni = 0; ni < spans.length; ni++) {
            allTextAnnotations.add(new TextAnnotation(type, spans[ni], probs[ni]));
        }
    }
    if (allTextAnnotations.size() > 0) {
        removeConflicts(allTextAnnotations);
    }
    convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
    return namedEntities;
}
Also used : TokenNameFinderModel(opennlp.tools.namefind.TokenNameFinderModel) PooledTokenNameFinderModel(org.elasticsearch.service.opennlp.models.PooledTokenNameFinderModel) Span(opennlp.tools.util.Span) NameFinderME(opennlp.tools.namefind.NameFinderME) TextAnnotation(org.elasticsearch.service.opennlp.models.TextAnnotation)

Example 3 with TextAnnotation

use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.

the class OpenNlpService method removeConflicts.

/* Copied from https://github.com/tamingtext/book/blob/master/src/test/java/com/tamingtext/opennlp/NameFinderTest.java */
private void removeConflicts(List<TextAnnotation> allTextAnnotations) {
    java.util.Collections.sort(allTextAnnotations);
    List<TextAnnotation> stack = new ArrayList<TextAnnotation>();
    stack.add(allTextAnnotations.get(0));
    for (int ai = 1; ai < allTextAnnotations.size(); ai++) {
        TextAnnotation curr = allTextAnnotations.get(ai);
        boolean deleteCurr = false;
        for (int ki = stack.size() - 1; ki >= 0; ki--) {
            TextAnnotation prev = stack.get(ki);
            if (prev.getSpan().equals(curr.getSpan())) {
                if (prev.getProb() > curr.getProb()) {
                    deleteCurr = true;
                    break;
                } else {
                    allTextAnnotations.remove(stack.remove(ki));
                    ai--;
                }
            } else if (prev.getSpan().intersects(curr.getSpan())) {
                if (prev.getProb() > curr.getProb()) {
                    deleteCurr = true;
                    break;
                } else {
                    allTextAnnotations.remove(stack.remove(ki));
                    ai--;
                }
            } else if (prev.getSpan().contains(curr.getSpan())) {
                break;
            } else {
                stack.remove(ki);
            }
        }
        if (deleteCurr) {
            allTextAnnotations.remove(ai);
            ai--;
            deleteCurr = false;
        } else {
            stack.add(curr);
        }
    }
}
Also used : TextAnnotation(org.elasticsearch.service.opennlp.models.TextAnnotation)

Example 4 with TextAnnotation

use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.

the class SimpleNlpTest method convertTextAnnotationsToNamedEntities.

public void convertTextAnnotationsToNamedEntities(String[] tokens, List<TextAnnotation> TextAnnotations, Map<String, Set<String>> namedEntities) {
    for (TextAnnotation TextAnnotation : TextAnnotations) {
        int start = TextAnnotation.getSpan().getStart();
        int end = TextAnnotation.getSpan().getEnd();
        String[] TextAnnotationData = Arrays.copyOfRange(tokens, start, end);
        String content = Joiner.on(" ").join(TextAnnotationData);
        String type = TextAnnotation.getType();
        if (!namedEntities.containsKey(type)) {
            Set<String> typeList = Sets.newHashSet();
            namedEntities.put(type, typeList);
        }
        namedEntities.get(type).add(content);
    }
}
Also used : TextAnnotation(org.elasticsearch.service.opennlp.models.TextAnnotation)

Example 5 with TextAnnotation

use of org.elasticsearch.service.opennlp.models.TextAnnotation in project elasticsearch-opennlp-plugin by spinscale.

the class SimpleNlpTest method testThatMultipleFindersWork.

@Test
public void testThatMultipleFindersWork() throws Exception {
    loadFinders();
    Map<String, Set<String>> namedEntities = Maps.newHashMap();
    for (int si = 0; si < sentences.length; si++) {
        List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
        String[] tokens = tokenizer.tokenize(sentences[si]);
        for (int fi = 0; fi < finders.length; fi++) {
            Span[] spans = finders[fi].find(tokens);
            double[] probs = finders[fi].probs(spans);
            for (int ni = 0; ni < spans.length; ni++) {
                allTextAnnotations.add(new TextAnnotation(names[fi], spans[ni], probs[ni]));
            }
        }
        removeConflicts(allTextAnnotations);
        convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
    }
    assertThat(namedEntities.get("person"), hasSize(3));
    assertThat(namedEntities.get("person"), containsInAnyOrder("Nancy Reagan", "Reagan", "Joanne Drake"));
    assertThat(namedEntities.get("location"), hasSize(3));
    assertThat(namedEntities.get("location"), containsInAnyOrder("Los Angeles", "Santa Monica", "California"));
    assertThat(namedEntities.get("date"), hasSize(1));
    assertThat(namedEntities.get("date"), containsInAnyOrder("Sunday"));
}
Also used : TextAnnotation(org.elasticsearch.service.opennlp.models.TextAnnotation) Span(opennlp.tools.util.Span) Test(org.junit.Test)

Aggregations

TextAnnotation (org.elasticsearch.service.opennlp.models.TextAnnotation)6 Span (opennlp.tools.util.Span)2 NameFinderME (opennlp.tools.namefind.NameFinderME)1 TokenNameFinderModel (opennlp.tools.namefind.TokenNameFinderModel)1 PooledTokenNameFinderModel (org.elasticsearch.service.opennlp.models.PooledTokenNameFinderModel)1 Test (org.junit.Test)1