use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestPhoneticFilterFactory method testFactoryReflectionCaverphone2.
/**
* we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
* so this effectively tests reflection without package name
*/
public void testFactoryReflectionCaverphone2() throws IOException {
Map<String, String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertTrue(factory.getEncoder() instanceof Caverphone2);
// default
assertTrue(factory.inject);
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestMorfologikFilterFactory method testMissingDictionary.
public void testMissingDictionary() throws Exception {
final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
IOException expected = expectThrows(IOException.class, () -> {
Map<String, String> params = new HashMap<>();
params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
factory.inform(loader);
});
assertTrue(expected.getMessage().contains("Resource not found"));
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestICUTokenizerFactory method testTokenizeLatinDontBreakOnHyphens.
public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
Reader reader = new StringReader("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish.");
final Map<String, String> args = new HashMap<>();
args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(getClass()));
Tokenizer stream = factory.create(newAttributeFactory());
stream.setReader(reader);
assertTokenStreamContents(stream, new String[] { "One-two", "punch", "Brang", "not", "brung-it", "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestICUTokenizerFactory method testKeywordTokenizeCyrillicAndThai.
/**
* Specify more than one script/rule file pair.
* Override default DefaultICUTokenizerConfig Thai script tokenization.
* Use the same rule file for both scripts.
*/
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
Reader reader = new StringReader("Some English. Немного русский. ข้อความภาษาไทยเล็ก ๆ น้อย ๆ More English.");
final Map<String, String> args = new HashMap<>();
args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(getClass()));
Tokenizer stream = factory.create(newAttributeFactory());
stream.setReader(reader);
assertTokenStreamContents(stream, new String[] { "Some", "English", "Немного русский. ", "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ ", "More", "English" });
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project tika by apache.
the class AnalyzerDeserializer method buildTokenizerFactory.
private static TokenizerFactory buildTokenizerFactory(JsonElement map, String analyzerName) throws IOException {
if (!(map instanceof JsonObject)) {
throw new IllegalArgumentException("Expecting a map with \"factory\" string and " + "\"params\" map in tokenizer factory;" + " not: " + map.toString() + " in " + analyzerName);
}
JsonElement factoryEl = ((JsonObject) map).get(FACTORY);
if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
throw new IllegalArgumentException("Expecting value for factory in char filter factory builder in:" + analyzerName);
}
String factoryName = factoryEl.getAsString();
factoryName = factoryName.startsWith("oala.") ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.") : factoryName;
JsonElement paramsEl = ((JsonObject) map).get(PARAMS);
Map<String, String> params = mapify(paramsEl);
String spiName = "";
for (String s : TokenizerFactory.availableTokenizers()) {
Class clazz = TokenizerFactory.lookupClass(s);
if (clazz.getName().equals(factoryName)) {
spiName = s;
break;
}
}
if (spiName.equals("")) {
throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.TokenizerFactory with name" + "'" + factoryName + "' does not exist.");
}
try {
TokenizerFactory tokenizerFactory = TokenizerFactory.forName(spiName, params);
if (tokenizerFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokenizerFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
}
return tokenizerFactory;
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("While working on " + analyzerName, e);
}
}
Aggregations