Search in sources :

Example 6 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project elasticsearch by elastic.

the class AnalysisRegistryTests method testConfigureCamelCaseTokenFilter.

public void testConfigureCamelCaseTokenFilter() throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.wordDelimiter.type", "word_delimiter").put("index.analysis.filter.wordDelimiter.split_on_numerics", false).put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace").putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").put("index.analysis.analyzer.custom_analyzer_1.tokenizer", "whitespace").putArray("index.analysis.analyzer.custom_analyzer_1.filter", "lowercase", "word_delimiter").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    IndexAnalyzers indexAnalyzers = new AnalysisModule(new Environment(settings), emptyList()).getAnalysisRegistry().build(idxSettings);
    try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer")) {
        assertNotNull(custom_analyser);
        TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee");
        tokenStream.reset();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        List<String> token = new ArrayList<>();
        while (tokenStream.incrementToken()) {
            token.add(charTermAttribute.toString());
        }
        assertEquals(token.toString(), 2, token.size());
        assertEquals("j2se", token.get(0));
        assertEquals("j2ee", token.get(1));
    }
    try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_1")) {
        assertNotNull(custom_analyser);
        TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee");
        tokenStream.reset();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        List<String> token = new ArrayList<>();
        while (tokenStream.incrementToken()) {
            token.add(charTermAttribute.toString());
        }
        assertEquals(token.toString(), 6, token.size());
        assertEquals("j", token.get(0));
        assertEquals("2", token.get(1));
        assertEquals("se", token.get(2));
        assertEquals("j", token.get(3));
        assertEquals("2", token.get(4));
        assertEquals("ee", token.get(5));
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) IndexSettings(org.elasticsearch.index.IndexSettings) ArrayList(java.util.ArrayList) Environment(org.elasticsearch.env.Environment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Example 7 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project elasticsearch by elastic.

the class AnalysisTestsHelper method createTestAnalysisFromSettings.

public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
    if (settings.get(IndexMetaData.SETTING_VERSION_CREATED) == null) {
        settings = Settings.builder().put(settings).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    }
    IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisRegistry analysisRegistry = new AnalysisModule(new Environment(settings), emptyList()).getAnalysisRegistry();
    return new ESTestCase.TestAnalysis(analysisRegistry.build(indexSettings), analysisRegistry.buildTokenFilterFactories(indexSettings), analysisRegistry.buildTokenizerFactories(indexSettings), analysisRegistry.buildCharFilterFactories(indexSettings));
}
Also used : IndexSettings(org.elasticsearch.index.IndexSettings) Environment(org.elasticsearch.env.Environment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule)

Example 8 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project crate by crate.

the class AnalysisTestsHelper method createTestAnalysisFromSettings.

public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(final Settings settings, final Path configPath, final AnalysisPlugin... plugins) throws IOException {
    final Settings actualSettings;
    if (settings.get(IndexMetadata.SETTING_VERSION_CREATED) == null) {
        actualSettings = Settings.builder().put(settings).put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
    } else {
        actualSettings = settings;
    }
    final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", actualSettings);
    final AnalysisRegistry analysisRegistry = new AnalysisModule(new Environment(actualSettings, configPath), Arrays.asList(plugins)).getAnalysisRegistry();
    return new ESTestCase.TestAnalysis(analysisRegistry.build(indexSettings), analysisRegistry.buildTokenFilterFactories(indexSettings), analysisRegistry.buildTokenizerFactories(indexSettings), analysisRegistry.buildCharFilterFactories(indexSettings));
}
Also used : IndexSettings(org.elasticsearch.index.IndexSettings) Environment(org.elasticsearch.env.Environment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Aggregations

Environment (org.elasticsearch.env.Environment)8 AnalysisModule (org.elasticsearch.indices.analysis.AnalysisModule)8 IndexSettings (org.elasticsearch.index.IndexSettings)6 Settings (org.elasticsearch.common.settings.Settings)4 ArrayList (java.util.ArrayList)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)2 NodeEnvironment (org.elasticsearch.env.NodeEnvironment)2 AnalysisRegistry (org.elasticsearch.index.analysis.AnalysisRegistry)2 MyFilterTokenFilterFactory (org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory)2 AnalysisProvider (org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider)2 AnalysisPlugin (org.elasticsearch.plugins.AnalysisPlugin)2 Analyzer (org.apache.lucene.analysis.Analyzer)1 AllEntries (org.elasticsearch.common.lucene.all.AllEntries)1 AllTokenStream (org.elasticsearch.common.lucene.all.AllTokenStream)1 TestEnvironment (org.elasticsearch.env.TestEnvironment)1 DictionaryCompoundWordTokenFilterFactory (org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory)1