use of opennlp.tools.namefind.TokenNameFinderModel in project elasticsearch-opennlp-plugin by spinscale.
the class OpenNlpService method tokenize.
public Map<String, Set<String>> tokenize(String content) {
Map<String, Set<String>> namedEntities = Maps.newHashMap();
List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
for (Map.Entry<String, TokenNameFinderModel> finderEntry : finders.entrySet()) {
String type = finderEntry.getKey();
NameFinderME finder = new NameFinderME(finderEntry.getValue());
Span[] spans = finder.find(tokens);
double[] probs = finder.probs(spans);
for (int ni = 0; ni < spans.length; ni++) {
allTextAnnotations.add(new TextAnnotation(type, spans[ni], probs[ni]));
}
}
if (allTextAnnotations.size() > 0) {
removeConflicts(allTextAnnotations);
}
convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
return namedEntities;
}
use of opennlp.tools.namefind.TokenNameFinderModel in project stanbol by apache.
the class NEREngineCore method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
//first check the langauge before processing the content (text)
String language = extractLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
if (!isNerModel(language)) {
throw new IllegalStateException("For the language '" + language + "' of ContentItem " + ci.getUri() + " no NER model is configured: This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
final AnalysedText at = AnalysedTextUtils.getAnalysedText(ci);
//validate data in the AnalysedText
final String text;
if (at != null && at.getTokens().hasNext()) {
//if the AnalysedText is present and tokens are present
if (log.isDebugEnabled()) {
log.debug("computeEnhancements from AnalysedText ContentPart of ContentItem {}: text={}", ci.getUri().getUnicodeString(), StringUtils.abbreviate(at.getSpan(), 100));
}
text = null;
} else {
//no AnalysedText with tokens ...
//fallback to processing the plain text is still supported
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
// TODO: make the length of the data a field of the ContentItem
// interface to be able to filter out empty items in the canEnhance
// method
log.warn("ContentPart {} of ContentItem {} does not contain any text" + "to extract knowledge from in ContentItem {}", contentPart.getKey(), ci);
return;
}
if (log.isDebugEnabled()) {
log.debug("computeEnhancements from ContentPart {} of ContentItem {}: text={}", new Object[] { contentPart.getKey(), ci.getUri().getUnicodeString(), StringUtils.abbreviate(text, 100) });
}
}
try {
if (config.isProcessedLangage(language)) {
for (String defaultModelType : config.getDefaultModelTypes()) {
TokenNameFinderModel nameFinderModel = openNLP.getNameModel(defaultModelType, language);
if (nameFinderModel == null) {
log.info("No NER Model for {} and language {} available!", defaultModelType, language);
} else {
findNamedEntities(ci, at, text, language, nameFinderModel);
}
}
}
//process for additional models
for (String additionalModel : config.getSpecificNerModles(language)) {
TokenNameFinderModel nameFinderModel;
try {
nameFinderModel = openNLP.getModel(TokenNameFinderModel.class, additionalModel, null);
findNamedEntities(ci, at, text, language, nameFinderModel);
} catch (IOException e) {
log.warn("Unable to load TokenNameFinderModel model for language '" + language + "' (model: " + additionalModel + ")", e);
} catch (RuntimeException e) {
log.warn("Error while creating ChunkerModel for language '" + language + "' (model: " + additionalModel + ")", e);
}
}
} catch (Exception e) {
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
} else {
throw new EngineException(this, ci, e);
}
}
}
use of opennlp.tools.namefind.TokenNameFinderModel in project stanbol by apache.
the class OpenNLPTest method testLoadModelByName.
@Test
public void testLoadModelByName() throws IOException {
TokenizerModel tokenModel = openNLP.getModel(TokenizerModel.class, "en-token.bin", null);
Assert.assertNotNull(tokenModel);
SentenceModel sentModel = openNLP.getModel(SentenceModel.class, "en-sent.bin", null);
Assert.assertNotNull(sentModel);
POSModel posModel = openNLP.getModel(POSModel.class, "en-pos-maxent.bin", null);
Assert.assertNotNull(posModel);
ChunkerModel chunkModel = openNLP.getModel(ChunkerModel.class, "en-chunker.bin", null);
Assert.assertNotNull(chunkModel);
TokenNameFinderModel nerModel = openNLP.getModel(TokenNameFinderModel.class, "en-ner-person.bin", null);
Assert.assertNotNull(nerModel);
//unavailable model
tokenModel = openNLP.getModel(TokenizerModel.class, "ru-token.bin", null);
Assert.assertNull(tokenModel);
}
use of opennlp.tools.namefind.TokenNameFinderModel in project stanbol by apache.
the class OpenNLPTest method testLoadMissingNER.
@Test
public void testLoadMissingNER() throws IOException {
//first unknown type
TokenNameFinderModel model = openNLP.getNameModel("person2", "en");
Assert.assertNull(model);
TokenNameFinder ner = openNLP.getNameFinder("person2", "en");
Assert.assertNull(ner);
//unknown language
model = openNLP.getNameModel("person", "ru");
Assert.assertNull(model);
ner = openNLP.getNameFinder("person", "ru");
Assert.assertNull(ner);
}
use of opennlp.tools.namefind.TokenNameFinderModel in project textdb by TextDB.
the class NameFinderExample method main.
public static void main(String[] args) throws IOException {
String dataFile = "./src/main/resources/abstract_100.txt";
Scanner scan = new Scanner(new File(dataFile));
InputStream is = new FileInputStream("./src/main/java/edu/uci/ics/textdb/sandbox/OpenNLPexample/en-ner-location.bin");
TokenNameFinderModel model = new TokenNameFinderModel(is);
is.close();
NameFinderME nameFinder = new NameFinderME(model);
int counter = 0;
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
perfMon.start();
while (scan.hasNextLine()) {
String[] sentence = Tokenize(scan.nextLine());
Span[] spans = nameFinder.find(sentence);
perfMon.incrementCounter();
//Print out the tokens of the sentence
if (spans.length != 0) {
for (String s : sentence) {
System.out.print("[" + s + "] ");
}
System.out.println("/n");
}
//Print out the offset of each
for (Span s : spans) {
System.out.println(s.toString());
for (int i = s.getStart(); i < s.getEnd(); i++) {
System.out.println(sentence[i]);
counter++;
}
}
if (spans.length != 0)
System.out.println();
}
perfMon.stopAndPrintFinalResult();
System.out.println("Number of Results: " + counter);
scan.close();
}
Aggregations