Search in sources :

Example 6 with IndexAnalyzers

use of org.elasticsearch.index.analysis.IndexAnalyzers in project elasticsearch by elastic.

the class CommonGramsTokenFilterFactoryTests method testCommonGramsAnalysis.

public void testCommonGramsAnalysis() throws IOException {
    String json = "/org/elasticsearch/index/analysis/commongrams/commongrams.json";
    Settings settings = Settings.builder().loadFromStream(json, getClass().getResourceAsStream(json)).put(Environment.PATH_HOME_SETTING.getKey(), createHome()).build();
    {
        IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings).indexAnalyzers;
        Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
        String source = "the quick brown is a fox or not";
        String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
        assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
    }
    {
        IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings).indexAnalyzers;
        Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
        String source = "the quick brown is a fox or not";
        String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
        assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
    }
}
Also used : IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) Analyzer(org.apache.lucene.analysis.Analyzer) Settings(org.elasticsearch.common.settings.Settings)

Example 7 with IndexAnalyzers

use of org.elasticsearch.index.analysis.IndexAnalyzers in project elasticsearch by elastic.

the class AnalysisModuleTests method testAnalyzerAlias.

public void testAnalyzerAlias() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.analyzer.foobar.alias", "default").put("index.analysis.analyzer.foobar.type", "keyword").put("index.analysis.analyzer.foobar_search.alias", "default_search").put("index.analysis.analyzer.foobar_search.type", "english").put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_5)).build();
    AnalysisRegistry newRegistry = getNewRegistry(settings);
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(newRegistry, settings);
    assertThat(indexAnalyzers.get("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
    assertThat(indexAnalyzers.get("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class)));
    assertWarnings("setting [index.analysis.analyzer.foobar.alias] is only allowed on index [test] because it was created before " + "5.x; analyzer aliases can no longer be created on new indices.", "setting [index.analysis.analyzer.foobar_search.alias] is only allowed on index [test] because it was created before " + "5.x; analyzer aliases can no longer be created on new indices.");
}
Also used : AnalysisRegistry(org.elasticsearch.index.analysis.AnalysisRegistry) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Example 8 with IndexAnalyzers

use of org.elasticsearch.index.analysis.IndexAnalyzers in project elasticsearch by elastic.

the class AnalysisModuleTests method testAnalyzerAliasDefault.

public void testAnalyzerAliasDefault() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.analyzer.foobar.alias", "default").put("index.analysis.analyzer.foobar.type", "keyword").put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_5)).build();
    AnalysisRegistry newRegistry = getNewRegistry(settings);
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(newRegistry, settings);
    assertThat(indexAnalyzers.get("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
    assertThat(indexAnalyzers.get("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
    assertWarnings("setting [index.analysis.analyzer.foobar.alias] is only allowed on index [test] because it was created before " + "5.x; analyzer aliases can no longer be created on new indices.");
}
Also used : AnalysisRegistry(org.elasticsearch.index.analysis.AnalysisRegistry) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Example 9 with IndexAnalyzers

use of org.elasticsearch.index.analysis.IndexAnalyzers in project elasticsearch by elastic.

the class AnalysisModuleTests method testSimpleConfiguration.

private void testSimpleConfiguration(Settings settings) throws IOException {
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
    Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
    assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    assertThat(custom1.tokenFilters().length, equalTo(2));
    StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
    assertThat(stop1.stopWords().size(), equalTo(1));
    analyzer = indexAnalyzers.get("custom2").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    // verify position increment gap
    analyzer = indexAnalyzers.get("custom6").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
    assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
    // verify characters  mapping
    analyzer = indexAnalyzers.get("custom5").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
    assertThat(custom5.charFilters()[0], instanceOf(MappingCharFilterFactory.class));
    // check custom pattern replace filter
    analyzer = indexAnalyzers.get("custom3").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
    PatternReplaceCharFilterFactory patternReplaceCharFilterFactory = (PatternReplaceCharFilterFactory) custom3.charFilters()[0];
    assertThat(patternReplaceCharFilterFactory.getPattern().pattern(), equalTo("sample(.*)"));
    assertThat(patternReplaceCharFilterFactory.getReplacement(), equalTo("replacedSample $1"));
    // check custom class name (my)
    analyzer = indexAnalyzers.get("custom4").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
    assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
    //        // verify Czech stemmer
    //        analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
    //        assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    //        assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
    //        assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
    //
    //        // check dictionary decompounder
    //        analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
    //        assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
    Set<?> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list");
    MatcherAssert.assertThat(wordList.size(), equalTo(6));
//        MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
}
Also used : StopTokenFilterFactory(org.elasticsearch.index.analysis.StopTokenFilterFactory) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) StandardTokenizerFactory(org.elasticsearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) MappingCharFilterFactory(org.elasticsearch.index.analysis.MappingCharFilterFactory) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) GermanAnalyzer(org.apache.lucene.analysis.de.GermanAnalyzer) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) PatternReplaceCharFilterFactory(org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory) MyFilterTokenFilterFactory(org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory)

Example 10 with IndexAnalyzers

use of org.elasticsearch.index.analysis.IndexAnalyzers in project elasticsearch by elastic.

the class MapperTestUtils method newMapperService.

public static MapperService newMapperService(NamedXContentRegistry xContentRegistry, Path tempDir, Settings settings, IndicesModule indicesModule) throws IOException {
    Settings.Builder settingsBuilder = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).put(settings);
    if (settings.get(IndexMetaData.SETTING_VERSION_CREATED) == null) {
        settingsBuilder.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
    }
    Settings finalSettings = settingsBuilder.build();
    MapperRegistry mapperRegistry = indicesModule.getMapperRegistry();
    IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", finalSettings);
    IndexAnalyzers indexAnalyzers = createTestAnalysis(indexSettings, finalSettings).indexAnalyzers;
    SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
    return new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry, () -> null);
}
Also used : MapperRegistry(org.elasticsearch.indices.mapper.MapperRegistry) SimilarityService(org.elasticsearch.index.similarity.SimilarityService) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) Settings(org.elasticsearch.common.settings.Settings) MapperService(org.elasticsearch.index.mapper.MapperService)

Aggregations

IndexAnalyzers (org.elasticsearch.index.analysis.IndexAnalyzers)11 Settings (org.elasticsearch.common.settings.Settings)8 IndexSettings (org.elasticsearch.index.IndexSettings)7 Analyzer (org.apache.lucene.analysis.Analyzer)4 AnalysisRegistry (org.elasticsearch.index.analysis.AnalysisRegistry)4 NamedAnalyzer (org.elasticsearch.index.analysis.NamedAnalyzer)4 SimilarityService (org.elasticsearch.index.similarity.SimilarityService)4 MapperService (org.elasticsearch.index.mapper.MapperService)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 MapperRegistry (org.elasticsearch.indices.mapper.MapperRegistry)2 AbstractMap (java.util.AbstractMap)1 HashSet (java.util.HashSet)1 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)1 GermanAnalyzer (org.apache.lucene.analysis.de.GermanAnalyzer)1 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)1 CompressedXContent (org.elasticsearch.common.compress.CompressedXContent)1 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)1 Index (org.elasticsearch.index.Index)1 CustomAnalyzer (org.elasticsearch.index.analysis.CustomAnalyzer)1 MappingCharFilterFactory (org.elasticsearch.index.analysis.MappingCharFilterFactory)1