use of opennlp.tools.tokenize.TokenizerME in project languagetool by languagetool-org.
the class EnglishChunker method tokenize.
// non-private for test cases
String[] tokenize(String sentence) {
TokenizerME tokenizer = new TokenizerME(tokenModel);
// this is the type of apostrophe that OpenNLP expects
String cleanString = sentence.replace('’', '\'');
return tokenizer.tokenize(cleanString);
}
use of opennlp.tools.tokenize.TokenizerME in project textdb by TextDB.
the class POSTagexample method Tokenize.
public static String[] Tokenize(String sentence) throws InvalidFormatException, IOException {
InputStream is = new FileInputStream("./src/main/java/edu/uci/ics/textdb/sandbox/OpenNLPexample/en-token.bin");
TokenizerModel model = new TokenizerModel(is);
Tokenizer tokenizer = new TokenizerME(model);
String[] tokens = tokenizer.tokenize(sentence);
is.close();
return tokens;
}
use of opennlp.tools.tokenize.TokenizerME in project deeplearning4j by deeplearning4j.
the class ConcurrentTokenizer method initialize.
/**
* Initializes the current instance with the given context.
*
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
TokenizerModel model;
try {
TokenizerModelResource modelResource = (TokenizerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER);
model = modelResource.getModel();
} catch (ResourceAccessException e) {
throw new ResourceInitializationException(e);
}
tokenizer = new TokenizerME(model);
}
use of opennlp.tools.tokenize.TokenizerME in project textdb by TextDB.
the class NameFinderExample method Tokenize.
public static String[] Tokenize(String sentence) throws InvalidFormatException, IOException {
InputStream is = new FileInputStream("./src/main/java/edu/uci/ics/textdb/sandbox/OpenNLPexample/en-token.bin");
TokenizerModel model = new TokenizerModel(is);
Tokenizer tokenizer = new TokenizerME(model);
String[] tokens = tokenizer.tokenize(sentence);
is.close();
return tokens;
}
use of opennlp.tools.tokenize.TokenizerME in project stanbol by apache.
the class OpenNLP method getTokenizer.
/**
* Getter for the Tokenizer of a given language. This first tries to
* create an {@link TokenizerME} instance if the required
* {@link TokenizerModel} for the parsed language is available. if such a
* model is not available it returns the {@link SimpleTokenizer} instance.
* @param language the language or <code>null</code> to build a
* {@link SimpleTokenizer}
* @return the {@link Tokenizer} for the parsed language.
*/
public Tokenizer getTokenizer(String language) {
Tokenizer tokenizer = null;
if (language != null) {
try {
TokenizerModel model = getTokenizerModel(language);
if (model != null) {
tokenizer = new TokenizerME(model);
}
} catch (InvalidFormatException e) {
log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
} catch (IOException e) {
log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", e);
}
}
if (tokenizer == null) {
log.debug("Use Simple Tokenizer for language {}", language);
tokenizer = SimpleTokenizer.INSTANCE;
} else {
log.debug("Use ME Tokenizer for language {}", language);
}
return tokenizer;
}
Aggregations