Search in sources :

Example 26 with CharFilterFactory

use of org.apache.lucene.analysis.util.CharFilterFactory in project tika by apache.

the class AnalyzerDeserializer method buildAnalyzer.

public static Analyzer buildAnalyzer(String analyzerName, JsonElement value, int maxTokens) throws IOException {
    if (!value.isJsonObject()) {
        throw new IllegalArgumentException("Expecting map of charfilter, tokenizer, tokenfilters");
    }
    JsonObject aRoot = (JsonObject) value;
    CharFilterFactory[] charFilters = new CharFilterFactory[0];
    TokenizerFactory tokenizerFactory = null;
    TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
    for (Map.Entry<String, JsonElement> e : aRoot.entrySet()) {
        String k = e.getKey();
        if (k.equals(CHAR_FILTERS)) {
            charFilters = buildCharFilters(e.getValue(), analyzerName);
        } else if (k.equals(TOKEN_FILTERS)) {
            tokenFilterFactories = buildTokenFilterFactories(e.getValue(), analyzerName, maxTokens);
        } else if (k.equals(TOKENIZER)) {
            tokenizerFactory = buildTokenizerFactory(e.getValue(), analyzerName);
        } else if (!k.equals(COMMENT)) {
            throw new IllegalArgumentException("Should have one of three values here:" + CHAR_FILTERS + ", " + TOKENIZER + ", " + TOKEN_FILTERS + ". I don't recognize: " + k);
        }
    }
    if (tokenizerFactory == null) {
        throw new IllegalArgumentException("Must specify at least a tokenizer factory for an analyzer!");
    }
    return new MyTokenizerChain(charFilters, tokenizerFactory, tokenFilterFactories);
}
Also used : TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) JsonElement(com.google.gson.JsonElement) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) JsonObject(com.google.gson.JsonObject) HashMap(java.util.HashMap) Map(java.util.Map) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Aggregations

CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)26 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)16 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)12 Analyzer (org.apache.lucene.analysis.Analyzer)7 MultiTermAwareComponent (org.apache.lucene.analysis.util.MultiTermAwareComponent)6 TokenizerChain (org.apache.solr.analysis.TokenizerChain)5 Reader (java.io.Reader)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)4 StringReader (java.io.StringReader)3 Map (java.util.Map)3 TokenStream (org.apache.lucene.analysis.TokenStream)3 Tokenizer (org.apache.lucene.analysis.Tokenizer)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 SolrException (org.apache.solr.common.SolrException)3 JsonElement (com.google.gson.JsonElement)2 JsonObject (com.google.gson.JsonObject)2 IOException (java.io.IOException)2 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)2