Search in sources :

Example 1 with StopTokenFilterFactory

use of org.elasticsearch.index.analysis.StopTokenFilterFactory in project elasticsearch by elastic.

the class AnalysisModuleTests method testSimpleConfiguration.

private void testSimpleConfiguration(Settings settings) throws IOException {
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
    Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
    assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    assertThat(custom1.tokenFilters().length, equalTo(2));
    StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
    assertThat(stop1.stopWords().size(), equalTo(1));
    analyzer = indexAnalyzers.get("custom2").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    // verify position increment gap
    analyzer = indexAnalyzers.get("custom6").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
    assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
    // verify characters  mapping
    analyzer = indexAnalyzers.get("custom5").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
    assertThat(custom5.charFilters()[0], instanceOf(MappingCharFilterFactory.class));
    // check custom pattern replace filter
    analyzer = indexAnalyzers.get("custom3").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
    PatternReplaceCharFilterFactory patternReplaceCharFilterFactory = (PatternReplaceCharFilterFactory) custom3.charFilters()[0];
    assertThat(patternReplaceCharFilterFactory.getPattern().pattern(), equalTo("sample(.*)"));
    assertThat(patternReplaceCharFilterFactory.getReplacement(), equalTo("replacedSample $1"));
    // check custom class name (my)
    analyzer = indexAnalyzers.get("custom4").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
    assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
    //        // verify Czech stemmer
    //        analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
    //        assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    //        assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
    //        assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
    //
    //        // check dictionary decompounder
    //        analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
    //        assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
    Set<?> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list");
    MatcherAssert.assertThat(wordList.size(), equalTo(6));
//        MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
}
Also used : StopTokenFilterFactory(org.elasticsearch.index.analysis.StopTokenFilterFactory) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) StandardTokenizerFactory(org.elasticsearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) MappingCharFilterFactory(org.elasticsearch.index.analysis.MappingCharFilterFactory) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) GermanAnalyzer(org.apache.lucene.analysis.de.GermanAnalyzer) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) PatternReplaceCharFilterFactory(org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory) MyFilterTokenFilterFactory(org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)1 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)1 GermanAnalyzer (org.apache.lucene.analysis.de.GermanAnalyzer)1 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 CustomAnalyzer (org.elasticsearch.index.analysis.CustomAnalyzer)1 IndexAnalyzers (org.elasticsearch.index.analysis.IndexAnalyzers)1 MappingCharFilterFactory (org.elasticsearch.index.analysis.MappingCharFilterFactory)1 NamedAnalyzer (org.elasticsearch.index.analysis.NamedAnalyzer)1 PatternReplaceCharFilterFactory (org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory)1 StandardTokenizerFactory (org.elasticsearch.index.analysis.StandardTokenizerFactory)1 StopTokenFilterFactory (org.elasticsearch.index.analysis.StopTokenFilterFactory)1 MyFilterTokenFilterFactory (org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory)1