use of org.apache.lucene.analysis.util.CharFilterFactory in project tika by apache.
the class AnalyzerDeserializer method buildAnalyzer.
public static Analyzer buildAnalyzer(String analyzerName, JsonElement value, int maxTokens) throws IOException {
if (!value.isJsonObject()) {
throw new IllegalArgumentException("Expecting map of charfilter, tokenizer, tokenfilters");
}
JsonObject aRoot = (JsonObject) value;
CharFilterFactory[] charFilters = new CharFilterFactory[0];
TokenizerFactory tokenizerFactory = null;
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
for (Map.Entry<String, JsonElement> e : aRoot.entrySet()) {
String k = e.getKey();
if (k.equals(CHAR_FILTERS)) {
charFilters = buildCharFilters(e.getValue(), analyzerName);
} else if (k.equals(TOKEN_FILTERS)) {
tokenFilterFactories = buildTokenFilterFactories(e.getValue(), analyzerName, maxTokens);
} else if (k.equals(TOKENIZER)) {
tokenizerFactory = buildTokenizerFactory(e.getValue(), analyzerName);
} else if (!k.equals(COMMENT)) {
throw new IllegalArgumentException("Should have one of three values here:" + CHAR_FILTERS + ", " + TOKENIZER + ", " + TOKEN_FILTERS + ". I don't recognize: " + k);
}
}
if (tokenizerFactory == null) {
throw new IllegalArgumentException("Must specify at least a tokenizer factory for an analyzer!");
}
return new MyTokenizerChain(charFilters, tokenizerFactory, tokenFilterFactories);
}
Aggregations