use of opennlp.tools.tokenize.TokenizerModel in project textdb by TextDB.
the class NameFinderExample method Tokenize.
public static String[] Tokenize(String sentence) throws InvalidFormatException, IOException {
InputStream is = new FileInputStream("./src/main/java/edu/uci/ics/textdb/sandbox/OpenNLPexample/en-token.bin");
TokenizerModel model = new TokenizerModel(is);
Tokenizer tokenizer = new TokenizerME(model);
String[] tokens = tokenizer.tokenize(sentence);
is.close();
return tokens;
}
use of opennlp.tools.tokenize.TokenizerModel in project stanbol by apache.
the class OpenNLP method getTokenizer.
/**
* Getter for the Tokenizer of a given language. This first tries to
* create an {@link TokenizerME} instance if the required
* {@link TokenizerModel} for the parsed language is available. if such a
* model is not available it returns the {@link SimpleTokenizer} instance.
* @param language the language or <code>null</code> to build a
* {@link SimpleTokenizer}
* @return the {@link Tokenizer} for the parsed language.
*/
public Tokenizer getTokenizer(String language) {
Tokenizer tokenizer = null;
if (language != null) {
try {
TokenizerModel model = getTokenizerModel(language);
if (model != null) {
tokenizer = new TokenizerME(model);
}
} catch (InvalidFormatException e) {
log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
} catch (IOException e) {
log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
}
}
if (tokenizer == null) {
log.debug("Use Simple Tokenizer for language {}", language);
tokenizer = SimpleTokenizer.INSTANCE;
} else {
log.debug("Use ME Tokenizer for language {}", language);
}
return tokenizer;
}
use of opennlp.tools.tokenize.TokenizerModel in project stanbol by apache.
the class OpenNLPTest method testLoadEnTokenizer.
@Test
public void testLoadEnTokenizer() throws IOException {
TokenizerModel model = openNLP.getTokenizerModel("en");
Assert.assertNotNull(model);
Tokenizer tokenizer = openNLP.getTokenizer("en");
Assert.assertNotNull(tokenizer);
}
use of opennlp.tools.tokenize.TokenizerModel in project textdb by TextDB.
the class POSTagexample method Tokenize.
public static String[] Tokenize(String sentence) throws InvalidFormatException, IOException {
InputStream is = new FileInputStream("./src/main/java/edu/uci/ics/texera/sandbox/OpenNLPexample/en-token.bin");
TokenizerModel model = new TokenizerModel(is);
Tokenizer tokenizer = new TokenizerME(model);
String[] tokens = tokenizer.tokenize(sentence);
is.close();
return tokens;
}
Aggregations