use of org.apache.stanbol.commons.solr.utils.StanbolResourceLoader in project stanbol by apache.
the class KuromojiNlpEngine method activate.
/**
* Activate and read the properties. Configures and initialises a POSTagger for each language configured in
* CONFIG_LANGUAGES.
*
* @param ce the {@link org.osgi.service.component.ComponentContext}
*/
@Activate
protected void activate(ComponentContext ce) throws ConfigurationException, IOException {
log.info("activating smartcn tokenizing engine");
super.activate(ce);
//init the Solr ResourceLoader used for initialising the components
//first a ResourceLoader for this classloader, 2nd one using the commons.solr.core classloader
//and third the parentResourceLoader (if present).
resourceLoader = new StanbolResourceLoader(KuromojiNlpEngine.class.getClassLoader(), new StanbolResourceLoader(parentResourceLoader));
tokenizerFactory = new JapaneseTokenizerFactory(TOKENIZER_FACTORY_CONFIG);
((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader);
//base form filter
TokenFilterFactory baseFormFilterFactory = new JapaneseBaseFormFilterFactory(BASE_FORM_FILTER_CONFIG);
filterFactories.add(baseFormFilterFactory);
//POS filter
TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory(POS_FILTER_CONFIG);
((ResourceLoaderAware) posFilterFactory).inform(resourceLoader);
filterFactories.add(posFilterFactory);
//Stemming
TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory(STEMM_FILTER_CONFIG);
filterFactories.add(stemmFilterFactory);
}
use of org.apache.stanbol.commons.solr.utils.StanbolResourceLoader in project stanbol by apache.
the class LuceneLabelTokenizer method activate.
@Activate
protected void activate(ComponentContext ctx) throws ConfigurationException {
//init the Solr ResourceLoader used for initialising the components
resourceLoader = new StanbolResourceLoader(parentResourceLoader);
//init the Solr CharFilterFactory (optional)
Object value = ctx.getProperties().get(PROPERTY_CHAR_FILTER_FACTORY);
if (value != null && !value.toString().isEmpty() && !DEFAULT_CLASS_NAME_CONFIG.equals(value)) {
Entry<String, Map<String, String>> charFilterConfig = parseConfigLine(PROPERTY_CHAR_FILTER_FACTORY, value.toString());
charFilterFactory = initAnalyzer(PROPERTY_CHAR_FILTER_FACTORY, charFilterConfig.getKey(), CharFilterFactory.class, charFilterConfig.getValue());
} else {
charFilterFactory = null;
}
//now initialise the TokenizerFactory (required)
value = ctx.getProperties().get(PROPERTY_TOKENIZER_FACTORY);
if (value == null || value.toString().isEmpty() || DEFAULT_CLASS_NAME_CONFIG.equals(value)) {
throw new ConfigurationException(PROPERTY_TOKENIZER_FACTORY, "The class name of the Lucene Tokemizer MUST BE configured");
}
Entry<String, Map<String, String>> tokenizerConfig = parseConfigLine(PROPERTY_CHAR_FILTER_FACTORY, value.toString());
tokenizerFactory = initAnalyzer(PROPERTY_TOKENIZER_FACTORY, tokenizerConfig.getKey(), TokenizerFactory.class, tokenizerConfig.getValue());
//initialise the list of Token Filters
Collection<String> values;
value = ctx.getProperties().get(PROPERTY_TOKEN_FILTER_FACTORY);
if (value == null) {
values = Collections.emptyList();
} else if (value instanceof Collection<?>) {
values = new ArrayList<String>(((Collection<?>) value).size());
for (Object v : (Collection<Object>) value) {
values.add(v.toString());
}
} else if (value instanceof String[]) {
values = Arrays.asList((String[]) value);
} else if (value instanceof String) {
values = Collections.singleton((String) value);
} else {
throw new ConfigurationException(PROPERTY_TOKEN_FILTER_FACTORY, "The type '" + value.getClass() + "' of the parsed value is not supported (supported are " + "Collections, String[] and String values)!");
}
for (String filterConfigLine : values) {
if (filterConfigLine == null || filterConfigLine.isEmpty() || DEFAULT_CLASS_NAME_CONFIG.equals(filterConfigLine)) {
//ignore null, empty and the default value
continue;
}
Entry<String, Map<String, String>> filterConfig = parseConfigLine(PROPERTY_CHAR_FILTER_FACTORY, filterConfigLine);
TokenFilterFactory tff = initAnalyzer(PROPERTY_TOKEN_FILTER_FACTORY, filterConfig.getKey(), TokenFilterFactory.class, filterConfig.getValue());
filterFactories.add(tff);
}
//init the language configuration
value = ctx.getProperties().get(LabelTokenizer.SUPPORTED_LANUAGES);
if (value == null) {
throw new ConfigurationException(LabelTokenizer.SUPPORTED_LANUAGES, "The language " + "configuration MUST BE present!");
}
langConf.setConfiguration(ctx.getProperties());
}
Aggregations