use of org.elasticsearch.index.analysis.StopTokenFilterFactory in project elasticsearch by elastic.
the class AnalysisModuleTests method testSimpleConfiguration.
private void testSimpleConfiguration(Settings settings) throws IOException {
IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
assertThat(custom1.tokenFilters().length, equalTo(2));
StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
assertThat(stop1.stopWords().size(), equalTo(1));
analyzer = indexAnalyzers.get("custom2").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
// verify position increment gap
analyzer = indexAnalyzers.get("custom6").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
// verify characters mapping
analyzer = indexAnalyzers.get("custom5").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
assertThat(custom5.charFilters()[0], instanceOf(MappingCharFilterFactory.class));
// check custom pattern replace filter
analyzer = indexAnalyzers.get("custom3").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
PatternReplaceCharFilterFactory patternReplaceCharFilterFactory = (PatternReplaceCharFilterFactory) custom3.charFilters()[0];
assertThat(patternReplaceCharFilterFactory.getPattern().pattern(), equalTo("sample(.*)"));
assertThat(patternReplaceCharFilterFactory.getReplacement(), equalTo("replacedSample $1"));
// check custom class name (my)
analyzer = indexAnalyzers.get("custom4").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
// // verify Czech stemmer
// analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
// CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
// assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
// assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
// assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
//
// // check dictionary decompounder
// analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
// CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
// assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
// assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
// assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
Set<?> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list");
MatcherAssert.assertThat(wordList.size(), equalTo(6));
// MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
}
Aggregations