Search in sources :

Example 1 with LimitTokenCountFilterFactory

use of org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory in project tika by apache.

the class AnalyzerDeserializer method buildTokenFilterFactories.

private static TokenFilterFactory[] buildTokenFilterFactories(JsonElement el, String analyzerName, int maxTokens) throws IOException {
    if (el == null || el.isJsonNull()) {
        return null;
    }
    if (!el.isJsonArray()) {
        throw new IllegalArgumentException("Expecting array for tokenfilters, but got:" + el.toString() + " in " + analyzerName);
    }
    JsonArray jsonArray = (JsonArray) el;
    List<TokenFilterFactory> ret = new LinkedList<>();
    for (JsonElement filterMap : jsonArray) {
        if (!(filterMap instanceof JsonObject)) {
            throw new IllegalArgumentException("Expecting a map with \"factory\" string and \"params\" map in token filter factory;" + " not: " + filterMap.toString() + " in " + analyzerName);
        }
        JsonElement factoryEl = ((JsonObject) filterMap).get(FACTORY);
        if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
            throw new IllegalArgumentException("Expecting value for factory in token filter factory builder in " + analyzerName);
        }
        String factoryName = factoryEl.getAsString();
        factoryName = factoryName.startsWith("oala.") ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.") : factoryName;
        JsonElement paramsEl = ((JsonObject) filterMap).get(PARAMS);
        Map<String, String> params = mapify(paramsEl);
        String spiName = "";
        for (String s : TokenFilterFactory.availableTokenFilters()) {
            Class clazz = TokenFilterFactory.lookupClass(s);
            if (clazz.getName().equals(factoryName)) {
                spiName = s;
                break;
            }
        }
        if (spiName.equals("")) {
            throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.TokenFilterFactory with name" + "'" + factoryName + "' does not exist.");
        }
        try {
            TokenFilterFactory tokenFilterFactory = TokenFilterFactory.forName(spiName, params);
            if (tokenFilterFactory instanceof ResourceLoaderAware) {
                ((ResourceLoaderAware) tokenFilterFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
            }
            ret.add(tokenFilterFactory);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("While loading " + analyzerName, e);
        }
    }
    if (maxTokens > -1) {
        Map<String, String> m = new HashMap<>();
        m.put("maxTokenCount", Integer.toString(maxTokens));
        ret.add(new LimitTokenCountFilterFactory(m));
    }
    if (ret.size() == 0) {
        return new TokenFilterFactory[0];
    }
    return ret.toArray(new TokenFilterFactory[ret.size()]);
}
Also used : HashMap(java.util.HashMap) JsonObject(com.google.gson.JsonObject) LinkedList(java.util.LinkedList) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) JsonArray(com.google.gson.JsonArray) JsonElement(com.google.gson.JsonElement) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware) LimitTokenCountFilterFactory(org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory)

Aggregations

JsonArray (com.google.gson.JsonArray)1 JsonElement (com.google.gson.JsonElement)1 JsonObject (com.google.gson.JsonObject)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 LimitTokenCountFilterFactory (org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory)1 ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)1 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)1 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)1