Search in sources :

Example 21 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestPhoneticFilterFactory method testFactoryReflectionCaverphone2.

/** 
   * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
   * so this effectively tests reflection without package name
   */
public void testFactoryReflectionCaverphone2() throws IOException {
    Map<String, String> args = new HashMap<>();
    args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
    assertTrue(factory.getEncoder() instanceof Caverphone2);
    // default
    assertTrue(factory.inject);
}
Also used : HashMap(java.util.HashMap) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) Caverphone2(org.apache.commons.codec.language.Caverphone2)

Example 22 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestMorfologikFilterFactory method testMissingDictionary.

public void testMissingDictionary() throws Exception {
    final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
    IOException expected = expectThrows(IOException.class, () -> {
        Map<String, String> params = new HashMap<>();
        params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
        MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
        factory.inform(loader);
    });
    assertTrue(expected.getMessage().contains("Resource not found"));
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) HashMap(java.util.HashMap) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) IOException(java.io.IOException)

Example 23 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestICUTokenizerFactory method testTokenizeLatinDontBreakOnHyphens.

public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
    Reader reader = new StringReader("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
    final Map<String, String> args = new HashMap<>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    Tokenizer stream = factory.create(newAttributeFactory());
    stream.setReader(reader);
    assertTokenStreamContents(stream, new String[] { "One-two", "punch", "Brang", "not", "brung-it", "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}
Also used : HashMap(java.util.HashMap) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Example 24 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestICUTokenizerFactory method testKeywordTokenizeCyrillicAndThai.

/**
   * Specify more than one script/rule file pair.
   * Override default DefaultICUTokenizerConfig Thai script tokenization.
   * Use the same rule file for both scripts.
   */
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
    Reader reader = new StringReader("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
    final Map<String, String> args = new HashMap<>();
    args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    Tokenizer stream = factory.create(newAttributeFactory());
    stream.setReader(reader);
    assertTokenStreamContents(stream, new String[] { "Some", "English", "Немного русский.  ", "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ", "More", "English" });
}
Also used : HashMap(java.util.HashMap) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Example 25 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project tika by apache.

the class AnalyzerDeserializer method buildTokenizerFactory.

private static TokenizerFactory buildTokenizerFactory(JsonElement map, String analyzerName) throws IOException {
    if (!(map instanceof JsonObject)) {
        throw new IllegalArgumentException("Expecting a map with \"factory\" string and " + "\"params\" map in tokenizer factory;" + " not: " + map.toString() + " in " + analyzerName);
    }
    JsonElement factoryEl = ((JsonObject) map).get(FACTORY);
    if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
        throw new IllegalArgumentException("Expecting value for factory in char filter factory builder in:" + analyzerName);
    }
    String factoryName = factoryEl.getAsString();
    factoryName = factoryName.startsWith("oala.") ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.") : factoryName;
    JsonElement paramsEl = ((JsonObject) map).get(PARAMS);
    Map<String, String> params = mapify(paramsEl);
    String spiName = "";
    for (String s : TokenizerFactory.availableTokenizers()) {
        Class clazz = TokenizerFactory.lookupClass(s);
        if (clazz.getName().equals(factoryName)) {
            spiName = s;
            break;
        }
    }
    if (spiName.equals("")) {
        throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.TokenizerFactory with name" + "'" + factoryName + "' does not exist.");
    }
    try {
        TokenizerFactory tokenizerFactory = TokenizerFactory.forName(spiName, params);
        if (tokenizerFactory instanceof ResourceLoaderAware) {
            ((ResourceLoaderAware) tokenizerFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
        }
        return tokenizerFactory;
    } catch (IllegalArgumentException e) {
        throw new IllegalArgumentException("While working on " + analyzerName, e);
    }
}
Also used : TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) JsonElement(com.google.gson.JsonElement) JsonObject(com.google.gson.JsonObject) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware)

Aggregations

ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)25 HashMap (java.util.HashMap)16 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)7 StringReader (java.io.StringReader)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Tokenizer (org.apache.lucene.analysis.Tokenizer)5 Reader (java.io.Reader)4 JsonElement (com.google.gson.JsonElement)3 JsonObject (com.google.gson.JsonObject)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 JsonArray (com.google.gson.JsonArray)2 LinkedList (java.util.LinkedList)2 Caverphone2 (org.apache.commons.codec.language.Caverphone2)2 Metaphone (org.apache.commons.codec.language.Metaphone)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 IOException (java.io.IOException)1 NodeStateResourceLoader (org.apache.jackrabbit.oak.plugins.index.lucene.NodeStateAnalyzerFactory.NodeStateResourceLoader)1 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)1 LimitTokenCountFilterFactory (org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory)1 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)1