use of opennlp.tools.util.InvalidFormatException in project stanbol by apache.
the class OpenNLP method getTokenizer.
/**
* Getter for the Tokenizer of a given language. This first tries to
* create an {@link TokenizerME} instance if the required
* {@link TokenizerModel} for the parsed language is available. if such a
* model is not available it returns the {@link SimpleTokenizer} instance.
* @param language the language or <code>null</code> to build a
* {@link SimpleTokenizer}
* @return the {@link Tokenizer} for the parsed language.
*/
public Tokenizer getTokenizer(String language) {
Tokenizer tokenizer = null;
if (language != null) {
try {
TokenizerModel model = getTokenizerModel(language);
if (model != null) {
tokenizer = new TokenizerME(model);
}
} catch (InvalidFormatException e) {
log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
} catch (IOException e) {
log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
}
}
if (tokenizer == null) {
log.debug("Use Simple Tokenizer for language {}", language);
tokenizer = SimpleTokenizer.INSTANCE;
} else {
log.debug("Use ME Tokenizer for language {}", language);
}
return tokenizer;
}
use of opennlp.tools.util.InvalidFormatException in project stanbol by apache.
the class OpenNLP method loadModel.
private <T> T loadModel(String name, Class<T> modelType, Map<String, String> modelProperties) throws InvalidFormatException, IOException {
if (modelProperties != null) {
// copy the data to avoid external modifications
modelProperties = new HashMap<String, String>(modelProperties);
} else {
modelProperties = new HashMap<String, String>();
}
if (!modelProperties.containsKey("Description")) {
modelProperties.put("Description", "Statistical model for OpenNLP");
}
if (!modelProperties.containsKey("Model Type")) {
modelProperties.put("Model Type", modelType.getSimpleName());
}
if (!modelProperties.containsKey("Download Location")) {
modelProperties.put("Download Location", DOWNLOAD_ROOT + name);
}
InputStream modelDataStream;
try {
modelDataStream = lookupModelStream(name, modelProperties);
} catch (IOException e) {
log.debug("Unable to load Resource {} via the DataFileProvider", name);
return null;
}
if (modelDataStream == null) {
log.debug("Unable to load Resource {} via the DataFileProvider", name);
return null;
}
T built;
try {
Constructor<T> constructor;
constructor = modelType.getConstructor(InputStream.class);
built = constructor.newInstance(modelDataStream);
} catch (SecurityException e) {
throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
} catch (NoSuchMethodException e) {
throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
} catch (IllegalArgumentException e) {
throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
} catch (InstantiationException e) {
throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
} catch (IllegalAccessException e) {
throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
} catch (InvocationTargetException e) {
// this indicates an exception while creating the instance
// for InvalidFormatException and IO Exceptions we shall
// directly throw the cause. for all others wrap the thrown one
// in an IllegalStateException
Throwable checked = e.getCause();
if (checked instanceof InvalidFormatException) {
throw (InvalidFormatException) checked;
} else if (checked instanceof IOException) {
throw (IOException) checked;
} else {
throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
}
} finally {
IOUtils.closeQuietly(modelDataStream);
}
return built;
}
use of opennlp.tools.util.InvalidFormatException in project stanbol by apache.
the class NEREngineCore method getSentenceModel.
/**
* Loads the {@link SentenceModel} for the parsed language or
* English as fallback if one for the language is not available
* @param language
* @return
*/
private SentenceModel getSentenceModel(String language) {
try {
SentenceModel model = openNLP.getSentenceModel(language);
if (model != null) {
return model;
} else {
// fallback to english
log.info("No sentence detection modle for {}. fallback to English");
model = openNLP.getSentenceModel("en");
if (model == null) {
throw new IllegalStateException(String.format("Unable to built Model for extracting sentences neither for '%s' " + "nor the fallback language 'en'.", language));
} else {
return model;
}
}
} catch (InvalidFormatException e) {
throw new IllegalStateException(String.format("Unable to built Model for extracting sentences from '%s' language texts.", language), e);
} catch (IOException e) {
throw new IllegalStateException(String.format("Unable to built Model for extracting sentences from '%s' language texts.", language), e);
}
}
Aggregations