Search in sources :

Example 1 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project elasticsearch by elastic.

the class CompoundAnalysisTests method testDefaultsCompoundAnalysis.

public void testDefaultsCompoundAnalysis() throws Exception {
    Settings settings = getJsonSettings();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {

        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
        }
    }));
    TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
    MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
}
Also used : IndexSettings(org.elasticsearch.index.IndexSettings) Environment(org.elasticsearch.env.Environment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule) MyFilterTokenFilterFactory(org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory) AnalysisProvider(org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) MyFilterTokenFilterFactory(org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory) DictionaryCompoundWordTokenFilterFactory(org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory) DictionaryCompoundWordTokenFilterFactory(org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory) AnalysisPlugin(org.elasticsearch.plugins.AnalysisPlugin)

Example 2 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project elasticsearch by elastic.

the class CompoundAnalysisTests method analyze.

private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {

        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
        }
    }));
    IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
    AllEntries allEntries = new AllEntries();
    allEntries.addText("field1", text, 1.0f);
    TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    List<String> terms = new ArrayList<>();
    while (stream.incrementToken()) {
        String tokText = termAtt.toString();
        terms.add(tokText);
    }
    return terms;
}
Also used : AllTokenStream(org.elasticsearch.common.lucene.all.AllTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) IndexSettings(org.elasticsearch.index.IndexSettings) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) AllEntries(org.elasticsearch.common.lucene.all.AllEntries) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Environment(org.elasticsearch.env.Environment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule) MyFilterTokenFilterFactory(org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory) AnalysisProvider(org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider) AnalysisPlugin(org.elasticsearch.plugins.AnalysisPlugin)

Example 3 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project elasticsearch by elastic.

the class ESTestCase method createTestAnalysis.

/**
     * Creates an TestAnalysis with all the default analyzers configured.
     */
public static TestAnalysis createTestAnalysis(IndexSettings indexSettings, Settings nodeSettings, AnalysisPlugin... analysisPlugins) throws IOException {
    Environment env = new Environment(nodeSettings);
    AnalysisModule analysisModule = new AnalysisModule(env, Arrays.asList(analysisPlugins));
    AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
    return new TestAnalysis(analysisRegistry.build(indexSettings), analysisRegistry.buildTokenFilterFactories(indexSettings), analysisRegistry.buildTokenizerFactories(indexSettings), analysisRegistry.buildCharFilterFactories(indexSettings));
}
Also used : AnalysisRegistry(org.elasticsearch.index.analysis.AnalysisRegistry) Environment(org.elasticsearch.env.Environment) NodeEnvironment(org.elasticsearch.env.NodeEnvironment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule)

Example 4 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project crate by crate.

the class ESTestCase method createTestAnalysis.

/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(IndexSettings indexSettings, Settings nodeSettings, AnalysisPlugin... analysisPlugins) throws IOException {
    Environment env = TestEnvironment.newEnvironment(nodeSettings);
    AnalysisModule analysisModule = new AnalysisModule(env, Arrays.asList(analysisPlugins));
    AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
    return new TestAnalysis(analysisRegistry.build(indexSettings), analysisRegistry.buildTokenFilterFactories(indexSettings), analysisRegistry.buildTokenizerFactories(indexSettings), analysisRegistry.buildCharFilterFactories(indexSettings));
}
Also used : AnalysisRegistry(org.elasticsearch.index.analysis.AnalysisRegistry) Environment(org.elasticsearch.env.Environment) NodeEnvironment(org.elasticsearch.env.NodeEnvironment) TestEnvironment(org.elasticsearch.env.TestEnvironment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule)

Example 5 with AnalysisModule

use of org.elasticsearch.indices.analysis.AnalysisModule in project elasticsearch by elastic.

the class TransportAnalyzeActionTests method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).put("index.analysis.filter.wordDelimiter.type", "word_delimiter").put("index.analysis.filter.wordDelimiter.split_on_numerics", false).put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace").putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace").putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").put("index.analysis.tokenizer.trigram.type", "ngram").put("index.analysis.tokenizer.trigram.min_gram", 3).put("index.analysis.tokenizer.trigram.max_gram", 3).put("index.analysis.filter.synonym.type", "synonym").putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay").put("index.analysis.filter.synonym.tokenizer", "trigram").put("index.analysis.filter.synonym.min_gram", 3).put("index.analysis.filter.synonym.max_gram", 3).build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    environment = new Environment(settings);
    registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry();
    indexAnalyzers = registry.build(idxSettings);
}
Also used : IndexSettings(org.elasticsearch.index.IndexSettings) Environment(org.elasticsearch.env.Environment) AnalysisModule(org.elasticsearch.indices.analysis.AnalysisModule) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Aggregations

Environment (org.elasticsearch.env.Environment)8 AnalysisModule (org.elasticsearch.indices.analysis.AnalysisModule)8 IndexSettings (org.elasticsearch.index.IndexSettings)6 Settings (org.elasticsearch.common.settings.Settings)4 ArrayList (java.util.ArrayList)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)2 NodeEnvironment (org.elasticsearch.env.NodeEnvironment)2 AnalysisRegistry (org.elasticsearch.index.analysis.AnalysisRegistry)2 MyFilterTokenFilterFactory (org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory)2 AnalysisProvider (org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider)2 AnalysisPlugin (org.elasticsearch.plugins.AnalysisPlugin)2 Analyzer (org.apache.lucene.analysis.Analyzer)1 AllEntries (org.elasticsearch.common.lucene.all.AllEntries)1 AllTokenStream (org.elasticsearch.common.lucene.all.AllTokenStream)1 TestEnvironment (org.elasticsearch.env.TestEnvironment)1 DictionaryCompoundWordTokenFilterFactory (org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory)1