use of org.apache.lucene.analysis.util.ResourceLoaderAware in project stanbol by apache.
the class KuromojiNlpEngine method activate.
/**
* Activate and read the properties. Configures and initialises a POSTagger for each language configured in
* CONFIG_LANGUAGES.
*
* @param ce the {@link org.osgi.service.component.ComponentContext}
*/
@Activate
protected void activate(ComponentContext ce) throws ConfigurationException, IOException {
log.info("activating smartcn tokenizing engine");
super.activate(ce);
//init the Solr ResourceLoader used for initialising the components
//first a ResourceLoader for this classloader, 2nd one using the commons.solr.core classloader
//and third the parentResourceLoader (if present).
resourceLoader = new StanbolResourceLoader(KuromojiNlpEngine.class.getClassLoader(), new StanbolResourceLoader(parentResourceLoader));
tokenizerFactory = new JapaneseTokenizerFactory(TOKENIZER_FACTORY_CONFIG);
((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader);
//base form filter
TokenFilterFactory baseFormFilterFactory = new JapaneseBaseFormFilterFactory(BASE_FORM_FILTER_CONFIG);
filterFactories.add(baseFormFilterFactory);
//POS filter
TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory(POS_FILTER_CONFIG);
((ResourceLoaderAware) posFilterFactory).inform(resourceLoader);
filterFactories.add(posFilterFactory);
//Stemming
TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory(STEMM_FILTER_CONFIG);
filterFactories.add(stemmFilterFactory);
}
use of org.apache.lucene.analysis.util.ResourceLoaderAware in project stanbol by apache.
the class LuceneLabelTokenizer method initAnalyzer.
private <T> T initAnalyzer(String property, String analyzerName, Class<T> type, Map<String, String> config) throws ConfigurationException {
Class<? extends T> analyzerClass;
try {
analyzerClass = resourceLoader.findClass(analyzerName, type);
} catch (SolrException e) {
throw new ConfigurationException(PROPERTY_CHAR_FILTER_FACTORY, "Unable find " + type.getSimpleName() + " '" + analyzerName + "'!", e);
}
Constructor<? extends T> constructor;
try {
constructor = analyzerClass.getConstructor(Map.class);
} catch (NoSuchMethodException e1) {
throw new ConfigurationException(PROPERTY_CHAR_FILTER_FACTORY, "Unable find " + type.getSimpleName() + "constructor with parameter Map<String,String> " + "for class " + analyzerClass + " (analyzer: '" + analyzerName + "') !");
}
addLuceneMatchVersionIfNotPresent(config);
T analyzer;
try {
analyzer = constructor.newInstance(config);
} catch (IllegalArgumentException e) {
throw new ConfigurationException(property, "Unable to instantiate " + type.getSimpleName() + ' ' + analyzerClass + " (analyzer: " + analyzerName + "') !", e);
} catch (InstantiationException e) {
throw new ConfigurationException(property, "Unable to instantiate " + type.getSimpleName() + ' ' + analyzerClass + " (analyzer: " + analyzerName + "') !", e);
} catch (IllegalAccessException e) {
throw new ConfigurationException(property, "Unable to instantiate " + type.getSimpleName() + ' ' + analyzerClass + " (analyzer: " + analyzerName + "') !", e);
} catch (InvocationTargetException e) {
throw new ConfigurationException(property, "Unable to instantiate " + type.getSimpleName() + ' ' + analyzerClass + " (analyzer: " + analyzerName + "') !", e);
}
if (analyzer instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) analyzer).inform(resourceLoader);
} catch (IOException e) {
throw new ConfigurationException(PROPERTY_CHAR_FILTER_FACTORY, "Could not load configuration");
}
}
return analyzer;
}
use of org.apache.lucene.analysis.util.ResourceLoaderAware in project tika by apache.
the class AnalyzerDeserializer method buildCharFilters.
private static CharFilterFactory[] buildCharFilters(JsonElement el, String analyzerName) throws IOException {
if (el == null || el.isJsonNull()) {
return null;
}
if (!el.isJsonArray()) {
throw new IllegalArgumentException("Expecting array for charfilters, but got:" + el.toString() + " for " + analyzerName);
}
JsonArray jsonArray = (JsonArray) el;
List<CharFilterFactory> ret = new LinkedList<CharFilterFactory>();
for (JsonElement filterMap : jsonArray) {
if (!(filterMap instanceof JsonObject)) {
throw new IllegalArgumentException("Expecting a map with \"factory\" string and \"params\" map in char filter factory;" + " not: " + filterMap.toString() + " in " + analyzerName);
}
JsonElement factoryEl = ((JsonObject) filterMap).get(FACTORY);
if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
throw new IllegalArgumentException("Expecting value for factory in char filter factory builder in:" + analyzerName);
}
String factoryName = factoryEl.getAsString();
factoryName = factoryName.replaceAll("oala.", "org.apache.lucene.analysis.");
JsonElement paramsEl = ((JsonObject) filterMap).get(PARAMS);
Map<String, String> params = mapify(paramsEl);
String spiName = "";
for (String s : CharFilterFactory.availableCharFilters()) {
Class clazz = CharFilterFactory.lookupClass(s);
if (clazz.getName().equals(factoryName)) {
spiName = s;
break;
}
}
if (spiName.equals("")) {
throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.CharFilterFactory with name" + "'" + factoryName + "' does not exist.");
}
try {
CharFilterFactory charFilterFactory = CharFilterFactory.forName(spiName, params);
if (charFilterFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) charFilterFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
}
ret.add(charFilterFactory);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("While trying to load " + analyzerName + ": " + e.getMessage(), e);
}
}
if (ret.size() == 0) {
return new CharFilterFactory[0];
}
return ret.toArray(new CharFilterFactory[ret.size()]);
}
use of org.apache.lucene.analysis.util.ResourceLoaderAware in project tika by apache.
the class AnalyzerDeserializer method buildTokenFilterFactories.
private static TokenFilterFactory[] buildTokenFilterFactories(JsonElement el, String analyzerName, int maxTokens) throws IOException {
if (el == null || el.isJsonNull()) {
return null;
}
if (!el.isJsonArray()) {
throw new IllegalArgumentException("Expecting array for tokenfilters, but got:" + el.toString() + " in " + analyzerName);
}
JsonArray jsonArray = (JsonArray) el;
List<TokenFilterFactory> ret = new LinkedList<>();
for (JsonElement filterMap : jsonArray) {
if (!(filterMap instanceof JsonObject)) {
throw new IllegalArgumentException("Expecting a map with \"factory\" string and \"params\" map in token filter factory;" + " not: " + filterMap.toString() + " in " + analyzerName);
}
JsonElement factoryEl = ((JsonObject) filterMap).get(FACTORY);
if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
throw new IllegalArgumentException("Expecting value for factory in token filter factory builder in " + analyzerName);
}
String factoryName = factoryEl.getAsString();
factoryName = factoryName.startsWith("oala.") ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.") : factoryName;
JsonElement paramsEl = ((JsonObject) filterMap).get(PARAMS);
Map<String, String> params = mapify(paramsEl);
String spiName = "";
for (String s : TokenFilterFactory.availableTokenFilters()) {
Class clazz = TokenFilterFactory.lookupClass(s);
if (clazz.getName().equals(factoryName)) {
spiName = s;
break;
}
}
if (spiName.equals("")) {
throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.TokenFilterFactory with name" + "'" + factoryName + "' does not exist.");
}
try {
TokenFilterFactory tokenFilterFactory = TokenFilterFactory.forName(spiName, params);
if (tokenFilterFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokenFilterFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
}
ret.add(tokenFilterFactory);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("While loading " + analyzerName, e);
}
}
if (maxTokens > -1) {
Map<String, String> m = new HashMap<>();
m.put("maxTokenCount", Integer.toString(maxTokens));
ret.add(new LimitTokenCountFilterFactory(m));
}
if (ret.size() == 0) {
return new TokenFilterFactory[0];
}
return ret.toArray(new TokenFilterFactory[ret.size()]);
}
use of org.apache.lucene.analysis.util.ResourceLoaderAware in project lucene-solr by apache.
the class ResourceLoaderTest method testAwareCompatibility.
public void testAwareCompatibility() throws Exception {
Class<?> clazz = ResourceLoaderAware.class;
// Check ResourceLoaderAware valid objects
assertAwareCompatibility(clazz, new NGramFilterFactory(new HashMap<>()));
assertAwareCompatibility(clazz, new KeywordTokenizerFactory(new HashMap<>()));
// Make sure it throws an error for invalid objects
Object[] invalid = new Object[] { // new NGramTokenFilter( null ),
"hello", new Float(12.3f), new LukeRequestHandler(), new JSONResponseWriter() };
for (Object obj : invalid) {
try {
assertAwareCompatibility(clazz, obj);
Assert.fail("Should be invalid class: " + obj + " FOR " + clazz);
}// OK
catch (SolrException ex) {
}
}
clazz = SolrCoreAware.class;
// Check ResourceLoaderAware valid objects
assertAwareCompatibility(clazz, new LukeRequestHandler());
assertAwareCompatibility(clazz, new FacetComponent());
assertAwareCompatibility(clazz, new JSONResponseWriter());
// Make sure it throws an error for invalid objects
invalid = new Object[] { new NGramFilterFactory(new HashMap<>()), "hello", new Float(12.3f), new KeywordTokenizerFactory(new HashMap<>()) };
for (Object obj : invalid) {
try {
assertAwareCompatibility(clazz, obj);
Assert.fail("Should be invalid class: " + obj + " FOR " + clazz);
}// OK
catch (SolrException ex) {
}
}
}
Aggregations