Search in sources :

Example 6 with AnalysisModule

use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.

the class MultiplexerTokenFilterTests method testMultiplexingFilter.

public void testMultiplexingFilter() throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.t.type", "truncate").put("index.analysis.filter.t.length", "2").put("index.analysis.filter.multiplexFilter.type", "multiplexer").putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
    try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) {
        assertNotNull(analyzer);
        assertAnalyzesTo(analyzer, "ONe tHree", new String[] { "ONe", "on", "ONE", "tHree", "th", "THREE" }, new int[] { 1, 0, 0, 1, 0, 0 });
        // Duplicates are removed
        assertAnalyzesTo(analyzer, "ONe THREE", new String[] { "ONe", "on", "ONE", "THREE", "th" }, new int[] { 1, 0, 0, 1, 0, 0 });
    }
}
Also used : NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) IndexSettings(org.opensearch.index.IndexSettings) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 7 with AnalysisModule

use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.

the class MultiplexerTokenFilterTests method testMultiplexingNoOriginal.

public void testMultiplexingNoOriginal() throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.t.type", "truncate").put("index.analysis.filter.t.length", "2").put("index.analysis.filter.multiplexFilter.type", "multiplexer").put("index.analysis.filter.multiplexFilter.preserve_original", "false").putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
    try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) {
        assertNotNull(analyzer);
        assertAnalyzesTo(analyzer, "ONe tHree", new String[] { "on", "ONE", "th", "THREE" }, new int[] { 1, 0, 1, 0 });
    }
}
Also used : NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) IndexSettings(org.opensearch.index.IndexSettings) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 8 with AnalysisModule

use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.

the class PredicateTokenScriptFilterTests method testSimpleFilter.

public void testSimpleFilter() throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.f.type", "predicate_token_filter").put("index.analysis.filter.f.script.source", "my_script").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "f").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {

        @Override
        public boolean execute(Token token) {
            return token.getPosition() < 2 || token.getPosition() > 4;
        }
    };
    @SuppressWarnings("unchecked") ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()) {

        @Override
        public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
            assertEquals(context, AnalysisPredicateScript.CONTEXT);
            assertEquals(new Script("my_script"), script);
            return (FactoryType) factory;
        }
    };
    CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
    plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null);
    AnalysisModule module = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
    IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
    try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
        assertNotNull(analyzer);
        assertAnalyzesTo(analyzer, "Oh what a wonderful thing to be", new String[] { "Oh", "what", "to", "be" });
    }
}
Also used : Script(org.opensearch.script.Script) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) IndexSettings(org.opensearch.index.IndexSettings) ScriptContext(org.opensearch.script.ScriptContext) ScriptService(org.opensearch.script.ScriptService) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 9 with AnalysisModule

use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.

the class ScriptedConditionTokenFilterTests method testSimpleCondition.

public void testSimpleCondition() throws Exception {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.cond.type", "condition").put("index.analysis.filter.cond.script.source", "token.getPosition() > 1").putList("index.analysis.filter.cond.filter", "uppercase").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "cond").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {

        @Override
        public boolean execute(Token token) {
            return token.getPosition() > 1;
        }
    };
    @SuppressWarnings("unchecked") ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()) {

        @Override
        public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
            assertEquals(context, AnalysisPredicateScript.CONTEXT);
            assertEquals(new Script("token.getPosition() > 1"), script);
            return (FactoryType) factory;
        }
    };
    CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
    plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null);
    AnalysisModule module = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
    IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
    try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
        assertNotNull(analyzer);
        assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[] { "Vorsprung", "Durch", "TECHNIK" });
    }
}
Also used : Script(org.opensearch.script.Script) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) IndexSettings(org.opensearch.index.IndexSettings) ScriptContext(org.opensearch.script.ScriptContext) ScriptService(org.opensearch.script.ScriptService) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 10 with AnalysisModule

use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.

the class CompoundAnalysisTests method testDefaultsCompoundAnalysis.

public void testDefaultsCompoundAnalysis() throws Exception {
    Settings settings = getJsonSettings();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = createAnalysisModule(settings);
    TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
    MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
}
Also used : IndexSettings(org.opensearch.index.IndexSettings) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) MyFilterTokenFilterFactory(org.opensearch.index.analysis.MyFilterTokenFilterFactory)

Aggregations

AnalysisModule (org.opensearch.indices.analysis.AnalysisModule)13 IndexSettings (org.opensearch.index.IndexSettings)12 Settings (org.opensearch.common.settings.Settings)11 IndexAnalyzers (org.opensearch.index.analysis.IndexAnalyzers)6 Environment (org.opensearch.env.Environment)5 TokenStream (org.apache.lucene.analysis.TokenStream)4 TestEnvironment (org.opensearch.env.TestEnvironment)4 NamedAnalyzer (org.opensearch.index.analysis.NamedAnalyzer)4 AnalysisProvider (org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider)3 AnalysisPlugin (org.opensearch.plugins.AnalysisPlugin)3 HashMap (java.util.HashMap)2 MockTokenFilter (org.apache.lucene.analysis.MockTokenFilter)2 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)2 AnalysisRegistry (org.opensearch.index.analysis.AnalysisRegistry)2 TokenFilterFactory (org.opensearch.index.analysis.TokenFilterFactory)2 Script (org.opensearch.script.Script)2 ScriptContext (org.opensearch.script.ScriptContext)2 ScriptService (org.opensearch.script.ScriptService)2 IOException (java.io.IOException)1 Reader (java.io.Reader)1