use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.
the class MultiplexerTokenFilterTests method testMultiplexingFilter.
public void testMultiplexingFilter() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.t.type", "truncate").put("index.analysis.filter.t.length", "2").put("index.analysis.filter.multiplexFilter.type", "multiplexer").putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "ONe tHree", new String[] { "ONe", "on", "ONE", "tHree", "th", "THREE" }, new int[] { 1, 0, 0, 1, 0, 0 });
// Duplicates are removed
assertAnalyzesTo(analyzer, "ONe THREE", new String[] { "ONe", "on", "ONE", "THREE", "th" }, new int[] { 1, 0, 0, 1, 0, 0 });
}
}
use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.
the class MultiplexerTokenFilterTests method testMultiplexingNoOriginal.
public void testMultiplexingNoOriginal() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.t.type", "truncate").put("index.analysis.filter.t.length", "2").put("index.analysis.filter.multiplexFilter.type", "multiplexer").put("index.analysis.filter.multiplexFilter.preserve_original", "false").putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "ONe tHree", new String[] { "on", "ONE", "th", "THREE" }, new int[] { 1, 0, 1, 0 });
}
}
use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.
the class PredicateTokenScriptFilterTests method testSimpleFilter.
public void testSimpleFilter() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.f.type", "predicate_token_filter").put("index.analysis.filter.f.script.source", "my_script").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "f").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
@Override
public boolean execute(Token token) {
return token.getPosition() < 2 || token.getPosition() > 4;
}
};
@SuppressWarnings("unchecked") ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()) {
@Override
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
assertEquals(context, AnalysisPredicateScript.CONTEXT);
assertEquals(new Script("my_script"), script);
return (FactoryType) factory;
}
};
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null);
AnalysisModule module = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "Oh what a wonderful thing to be", new String[] { "Oh", "what", "to", "be" });
}
}
use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.
the class ScriptedConditionTokenFilterTests method testSimpleCondition.
public void testSimpleCondition() throws Exception {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.cond.type", "condition").put("index.analysis.filter.cond.script.source", "token.getPosition() > 1").putList("index.analysis.filter.cond.filter", "uppercase").put("index.analysis.analyzer.myAnalyzer.type", "custom").put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard").putList("index.analysis.analyzer.myAnalyzer.filter", "cond").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
@Override
public boolean execute(Token token) {
return token.getPosition() > 1;
}
};
@SuppressWarnings("unchecked") ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()) {
@Override
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
assertEquals(context, AnalysisPredicateScript.CONTEXT);
assertEquals(new Script("token.getPosition() > 1"), script);
return (FactoryType) factory;
}
};
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null);
AnalysisModule module = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[] { "Vorsprung", "Durch", "TECHNIK" });
}
}
use of org.opensearch.indices.analysis.AnalysisModule in project OpenSearch by opensearch-project.
the class CompoundAnalysisTests method testDefaultsCompoundAnalysis.
public void testDefaultsCompoundAnalysis() throws Exception {
Settings settings = getJsonSettings();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = createAnalysisModule(settings);
TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
}
Aggregations