Search in sources :

Example 1 with InvalidFormatException

use of opennlp.tools.util.InvalidFormatException in project stanbol by apache.

the class OpenNLP method getTokenizer.

/**
 * Getter for the Tokenizer of a given language. This first tries to
 * create an {@link TokenizerME} instance if the required
 * {@link TokenizerModel} for the parsed language is available. if such a
 * model is not available it returns the {@link SimpleTokenizer} instance.
 * @param language the language or <code>null</code> to build a
 * {@link SimpleTokenizer}
 * @return the {@link Tokenizer} for the parsed language.
 */
public Tokenizer getTokenizer(String language) {
    Tokenizer tokenizer = null;
    if (language != null) {
        try {
            TokenizerModel model = getTokenizerModel(language);
            if (model != null) {
                tokenizer = new TokenizerME(model);
            }
        } catch (InvalidFormatException e) {
            log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
        } catch (IOException e) {
            log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
        }
    }
    if (tokenizer == null) {
        log.debug("Use Simple Tokenizer for language {}", language);
        tokenizer = SimpleTokenizer.INSTANCE;
    } else {
        log.debug("Use ME Tokenizer for language {}", language);
    }
    return tokenizer;
}
Also used : TokenizerME(opennlp.tools.tokenize.TokenizerME) IOException(java.io.IOException) Tokenizer(opennlp.tools.tokenize.Tokenizer) SimpleTokenizer(opennlp.tools.tokenize.SimpleTokenizer) TokenizerModel(opennlp.tools.tokenize.TokenizerModel) InvalidFormatException(opennlp.tools.util.InvalidFormatException)

Example 2 with InvalidFormatException

use of opennlp.tools.util.InvalidFormatException in project stanbol by apache.

the class OpenNLP method loadModel.

private <T> T loadModel(String name, Class<T> modelType, Map<String, String> modelProperties) throws InvalidFormatException, IOException {
    if (modelProperties != null) {
        // copy the data to avoid external modifications
        modelProperties = new HashMap<String, String>(modelProperties);
    } else {
        modelProperties = new HashMap<String, String>();
    }
    if (!modelProperties.containsKey("Description")) {
        modelProperties.put("Description", "Statistical model for OpenNLP");
    }
    if (!modelProperties.containsKey("Model Type")) {
        modelProperties.put("Model Type", modelType.getSimpleName());
    }
    if (!modelProperties.containsKey("Download Location")) {
        modelProperties.put("Download Location", DOWNLOAD_ROOT + name);
    }
    InputStream modelDataStream;
    try {
        modelDataStream = lookupModelStream(name, modelProperties);
    } catch (IOException e) {
        log.debug("Unable to load Resource {} via the DataFileProvider", name);
        return null;
    }
    if (modelDataStream == null) {
        log.debug("Unable to load Resource {} via the DataFileProvider", name);
        return null;
    }
    T built;
    try {
        Constructor<T> constructor;
        constructor = modelType.getConstructor(InputStream.class);
        built = constructor.newInstance(modelDataStream);
    } catch (SecurityException e) {
        throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
    } catch (NoSuchMethodException e) {
        throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
    } catch (IllegalArgumentException e) {
        throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
    } catch (InstantiationException e) {
        throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
    } catch (IllegalAccessException e) {
        throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
    } catch (InvocationTargetException e) {
        // this indicates an exception while creating the instance
        // for InvalidFormatException and IO Exceptions we shall
        // directly throw the cause. for all others wrap the thrown one
        // in an IllegalStateException
        Throwable checked = e.getCause();
        if (checked instanceof InvalidFormatException) {
            throw (InvalidFormatException) checked;
        } else if (checked instanceof IOException) {
            throw (IOException) checked;
        } else {
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
    } finally {
        IOUtils.closeQuietly(modelDataStream);
    }
    return built;
}
Also used : InputStream(java.io.InputStream) IOException(java.io.IOException) InvalidFormatException(opennlp.tools.util.InvalidFormatException) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 3 with InvalidFormatException

use of opennlp.tools.util.InvalidFormatException in project stanbol by apache.

the class NEREngineCore method getSentenceModel.

/**
 * Loads the {@link SentenceModel} for the parsed language or
 * English as fallback if one for the language is not available
 * @param language
 * @return
 */
private SentenceModel getSentenceModel(String language) {
    try {
        SentenceModel model = openNLP.getSentenceModel(language);
        if (model != null) {
            return model;
        } else {
            // fallback to english
            log.info("No sentence detection modle for {}. fallback to English");
            model = openNLP.getSentenceModel("en");
            if (model == null) {
                throw new IllegalStateException(String.format("Unable to built Model for extracting sentences neither for '%s' " + "nor the fallback language 'en'.", language));
            } else {
                return model;
            }
        }
    } catch (InvalidFormatException e) {
        throw new IllegalStateException(String.format("Unable to built Model for extracting sentences from '%s' language texts.", language), e);
    } catch (IOException e) {
        throw new IllegalStateException(String.format("Unable to built Model for extracting sentences from '%s' language texts.", language), e);
    }
}
Also used : SentenceModel(opennlp.tools.sentdetect.SentenceModel) IOException(java.io.IOException) InvalidFormatException(opennlp.tools.util.InvalidFormatException)

Aggregations

IOException (java.io.IOException)3 InvalidFormatException (opennlp.tools.util.InvalidFormatException)3 InputStream (java.io.InputStream)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 SentenceModel (opennlp.tools.sentdetect.SentenceModel)1 SimpleTokenizer (opennlp.tools.tokenize.SimpleTokenizer)1 Tokenizer (opennlp.tools.tokenize.Tokenizer)1 TokenizerME (opennlp.tools.tokenize.TokenizerME)1 TokenizerModel (opennlp.tools.tokenize.TokenizerModel)1