use of org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess in project deeplearning4j by deeplearning4j.
the class EndingPreProcessorTest method testPreProcessor.
@Test
public void testPreProcessor() {
TokenPreProcess preProcess = new EndingPreProcessor();
String endingTest = "ending";
assertEquals("end", preProcess.preProcess(endingTest));
}
use of org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method getTokenizerFactory.
protected static TokenizerFactory getTokenizerFactory(VectorsConfiguration configuration) {
if (configuration == null)
return null;
if (configuration != null && configuration.getTokenizerFactory() != null && !configuration.getTokenizerFactory().isEmpty()) {
try {
TokenizerFactory factory = (TokenizerFactory) Class.forName(configuration.getTokenizerFactory()).newInstance();
if (configuration.getTokenPreProcessor() != null && !configuration.getTokenPreProcessor().isEmpty()) {
TokenPreProcess preProcessor = (TokenPreProcess) Class.forName(configuration.getTokenPreProcessor()).newInstance();
factory.setTokenPreProcessor(preProcessor);
}
return factory;
} catch (InstantiationException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
return null;
}
use of org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess in project deeplearning4j by deeplearning4j.
the class TokenizerFunction method getTokenizerFactory.
private TokenizerFactory getTokenizerFactory() {
try {
TokenPreProcess tokenPreProcessInst = null;
// token preprocess CAN be undefined
if (tokenizerPreprocessorClazz != null && !tokenizerPreprocessorClazz.isEmpty()) {
Class<? extends TokenPreProcess> clazz = (Class<? extends TokenPreProcess>) Class.forName(tokenizerPreprocessorClazz);
tokenPreProcessInst = clazz.newInstance();
}
Class<? extends TokenizerFactory> clazz2 = (Class<? extends TokenizerFactory>) Class.forName(tokenizerFactoryClazz);
tokenizerFactory = clazz2.newInstance();
if (tokenPreProcessInst != null)
tokenizerFactory.setTokenPreProcessor(tokenPreProcessInst);
if (nGrams > 1) {
tokenizerFactory = new NGramTokenizerFactory(tokenizerFactory, nGrams, nGrams);
}
} catch (Exception e) {
e.printStackTrace();
}
return tokenizerFactory;
}
Aggregations