Search in sources :

Example 11 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestSuggestStopFilterFactory method createFactory.

private SuggestStopFilterFactory createFactory(String... params) throws IOException {
    if (params.length % 2 != 0) {
        throw new IllegalArgumentException("invalid keysAndValues map");
    }
    Map<String, String> args = new HashMap<>(params.length / 2);
    for (int i = 0; i < params.length; i += 2) {
        String previous = args.put(params[i], params[i + 1]);
        assertNull("duplicate values for key: " + params[i], previous);
    }
    args.put("luceneMatchVersion", Version.LATEST.toString());
    SuggestStopFilterFactory factory = new SuggestStopFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    return factory;
}
Also used : HashMap(java.util.HashMap) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 12 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestSuggestStopFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(getClass());
    assertTrue("loader is null and it shouldn't be", loader != null);
    SuggestStopFilterFactory factory = createFactory("words", "stop-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = createFactory("words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
    words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = createFactory("words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
    words = factory.getStopWords();
    assertEquals(8, words.size());
    assertTrue(words.contains("he"));
    assertTrue(words.contains("him"));
    assertTrue(words.contains("his"));
    assertTrue(words.contains("himself"));
    assertTrue(words.contains("she"));
    assertTrue(words.contains("her"));
    assertTrue(words.contains("hers"));
    assertTrue(words.contains("herself"));
    // defaults
    factory = createFactory();
    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
    assertEquals(false, factory.isIgnoreCase());
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 13 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project tika by apache.

the class AnalyzerDeserializer method buildCharFilters.

private static CharFilterFactory[] buildCharFilters(JsonElement el, String analyzerName) throws IOException {
    if (el == null || el.isJsonNull()) {
        return null;
    }
    if (!el.isJsonArray()) {
        throw new IllegalArgumentException("Expecting array for charfilters, but got:" + el.toString() + " for " + analyzerName);
    }
    JsonArray jsonArray = (JsonArray) el;
    List<CharFilterFactory> ret = new LinkedList<CharFilterFactory>();
    for (JsonElement filterMap : jsonArray) {
        if (!(filterMap instanceof JsonObject)) {
            throw new IllegalArgumentException("Expecting a map with \"factory\" string and \"params\" map in char filter factory;" + " not: " + filterMap.toString() + " in " + analyzerName);
        }
        JsonElement factoryEl = ((JsonObject) filterMap).get(FACTORY);
        if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
            throw new IllegalArgumentException("Expecting value for factory in char filter factory builder in:" + analyzerName);
        }
        String factoryName = factoryEl.getAsString();
        factoryName = factoryName.replaceAll("oala.", "org.apache.lucene.analysis.");
        JsonElement paramsEl = ((JsonObject) filterMap).get(PARAMS);
        Map<String, String> params = mapify(paramsEl);
        String spiName = "";
        for (String s : CharFilterFactory.availableCharFilters()) {
            Class clazz = CharFilterFactory.lookupClass(s);
            if (clazz.getName().equals(factoryName)) {
                spiName = s;
                break;
            }
        }
        if (spiName.equals("")) {
            throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.CharFilterFactory with name" + "'" + factoryName + "' does not exist.");
        }
        try {
            CharFilterFactory charFilterFactory = CharFilterFactory.forName(spiName, params);
            if (charFilterFactory instanceof ResourceLoaderAware) {
                ((ResourceLoaderAware) charFilterFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
            }
            ret.add(charFilterFactory);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("While trying to load " + analyzerName + ": " + e.getMessage(), e);
        }
    }
    if (ret.size() == 0) {
        return new CharFilterFactory[0];
    }
    return ret.toArray(new CharFilterFactory[ret.size()]);
}
Also used : CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) JsonObject(com.google.gson.JsonObject) LinkedList(java.util.LinkedList) JsonArray(com.google.gson.JsonArray) JsonElement(com.google.gson.JsonElement) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware)

Example 14 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project tika by apache.

the class AnalyzerDeserializer method buildTokenFilterFactories.

private static TokenFilterFactory[] buildTokenFilterFactories(JsonElement el, String analyzerName, int maxTokens) throws IOException {
    if (el == null || el.isJsonNull()) {
        return null;
    }
    if (!el.isJsonArray()) {
        throw new IllegalArgumentException("Expecting array for tokenfilters, but got:" + el.toString() + " in " + analyzerName);
    }
    JsonArray jsonArray = (JsonArray) el;
    List<TokenFilterFactory> ret = new LinkedList<>();
    for (JsonElement filterMap : jsonArray) {
        if (!(filterMap instanceof JsonObject)) {
            throw new IllegalArgumentException("Expecting a map with \"factory\" string and \"params\" map in token filter factory;" + " not: " + filterMap.toString() + " in " + analyzerName);
        }
        JsonElement factoryEl = ((JsonObject) filterMap).get(FACTORY);
        if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
            throw new IllegalArgumentException("Expecting value for factory in token filter factory builder in " + analyzerName);
        }
        String factoryName = factoryEl.getAsString();
        factoryName = factoryName.startsWith("oala.") ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.") : factoryName;
        JsonElement paramsEl = ((JsonObject) filterMap).get(PARAMS);
        Map<String, String> params = mapify(paramsEl);
        String spiName = "";
        for (String s : TokenFilterFactory.availableTokenFilters()) {
            Class clazz = TokenFilterFactory.lookupClass(s);
            if (clazz.getName().equals(factoryName)) {
                spiName = s;
                break;
            }
        }
        if (spiName.equals("")) {
            throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.TokenFilterFactory with name" + "'" + factoryName + "' does not exist.");
        }
        try {
            TokenFilterFactory tokenFilterFactory = TokenFilterFactory.forName(spiName, params);
            if (tokenFilterFactory instanceof ResourceLoaderAware) {
                ((ResourceLoaderAware) tokenFilterFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
            }
            ret.add(tokenFilterFactory);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("While loading " + analyzerName, e);
        }
    }
    if (maxTokens > -1) {
        Map<String, String> m = new HashMap<>();
        m.put("maxTokenCount", Integer.toString(maxTokens));
        ret.add(new LimitTokenCountFilterFactory(m));
    }
    if (ret.size() == 0) {
        return new TokenFilterFactory[0];
    }
    return ret.toArray(new TokenFilterFactory[ret.size()]);
}
Also used : HashMap(java.util.HashMap) JsonObject(com.google.gson.JsonObject) LinkedList(java.util.LinkedList) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) JsonArray(com.google.gson.JsonArray) JsonElement(com.google.gson.JsonElement) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware) LimitTokenCountFilterFactory(org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory)

Example 15 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project jackrabbit-oak by apache.

the class NodeStateAnalyzerFactoryTest method nodeStateResourceLoader.

@Test
public void nodeStateResourceLoader() throws Exception {
    byte[] testData = "hello".getBytes();
    NodeBuilder nb = EMPTY_NODE.builder();
    createFileNode(nb, "foo", testData);
    NodeStateResourceLoader loader = new NodeStateResourceLoader(nb.getNodeState(), new ClasspathResourceLoader());
    assertArrayEquals(testData, IOUtils.toByteArray(loader.openResource("foo")));
}
Also used : NodeStateResourceLoader(org.apache.jackrabbit.oak.plugins.index.lucene.NodeStateAnalyzerFactory.NodeStateResourceLoader) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) Test(org.junit.Test)

Aggregations

ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)25 HashMap (java.util.HashMap)16 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)7 StringReader (java.io.StringReader)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Tokenizer (org.apache.lucene.analysis.Tokenizer)5 Reader (java.io.Reader)4 JsonElement (com.google.gson.JsonElement)3 JsonObject (com.google.gson.JsonObject)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 JsonArray (com.google.gson.JsonArray)2 LinkedList (java.util.LinkedList)2 Caverphone2 (org.apache.commons.codec.language.Caverphone2)2 Metaphone (org.apache.commons.codec.language.Metaphone)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 IOException (java.io.IOException)1 NodeStateResourceLoader (org.apache.jackrabbit.oak.plugins.index.lucene.NodeStateAnalyzerFactory.NodeStateResourceLoader)1 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)1 LimitTokenCountFilterFactory (org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory)1 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)1