use of org.opensearch.index.analysis.PreConfiguredCharFilter in project OpenSearch by opensearch-project.
the class AnalysisModuleTests method testPluginPreConfiguredCharFilters.
/**
* Tests that plugins can register pre-configured char filters that vary in behavior based on OpenSearch version, Lucene version,
* and that do not vary based on version at all.
*/
public void testPluginPreConfiguredCharFilters() throws IOException {
boolean noVersionSupportsMultiTerm = randomBoolean();
boolean luceneVersionSupportsMultiTerm = randomBoolean();
boolean opensearchVersionSupportsMultiTerm = randomBoolean();
AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {
@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return Arrays.asList(PreConfiguredCharFilter.singleton("no_version", noVersionSupportsMultiTerm, tokenStream -> new AppendCharFilter(tokenStream, "no_version")), PreConfiguredCharFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, (tokenStream, luceneVersion) -> new AppendCharFilter(tokenStream, luceneVersion.toString())), PreConfiguredCharFilter.openSearchVersion("opensearch_version", opensearchVersionSupportsMultiTerm, (tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString())));
}
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
// Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash.
return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
})).getAnalysisRegistry();
Version version = VersionUtils.randomVersion(random());
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "keyword").put("index.analysis.analyzer.no_version.char_filter", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "keyword").put("index.analysis.analyzer.lucene_version.char_filter", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "keyword").put("index.analysis.analyzer.opensearch_version.char_filter", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "testno_version" });
assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { "test" + version.luceneVersion });
assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { "test" + version });
assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), analyzers.get("no_version").normalize("", "test").utf8ToString());
assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), analyzers.get("lucene_version").normalize("", "test").utf8ToString());
assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""), analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
use of org.opensearch.index.analysis.PreConfiguredCharFilter in project OpenSearch by opensearch-project.
the class TransportAnalyzeActionTests method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard").put("index.analysis.analyzer.custom_analyzer.filter", "mock").put("index.analysis.normalizer.my_normalizer.type", "custom").put("index.analysis.char_filter.my_append.type", "append").put("index.analysis.char_filter.my_append.suffix", "baz").put("index.analyze.max_token_count", 100).putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
this.indexSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
Environment environment = TestEnvironment.newEnvironment(settings);
AnalysisPlugin plugin = new AnalysisPlugin() {
class MockFactory extends AbstractTokenFilterFactory {
final CharacterRunAutomaton stopset;
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
if (settings.hasValue("stopword")) {
this.stopset = new CharacterRunAutomaton(Automata.makeString(settings.get("stopword")));
} else {
this.stopset = MockTokenFilter.ENGLISH_STOPSET;
}
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new MockTokenFilter(tokenStream, this.stopset);
}
}
class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}
@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecate("deprecated_token_filter_create", "Using deprecated token filter [deprecated]");
return tokenStream;
}
@Override
public TokenStream normalize(TokenStream tokenStream) {
deprecationLogger.deprecate("deprecated_token_filter_normalize", "Using deprecated token filter [deprecated]");
return tokenStream;
}
}
class AppendCharFilterFactory extends AbstractCharFilterFactory {
final String suffix;
AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name);
this.suffix = settings.get("suffix", "bar");
}
@Override
public Reader create(Reader reader) {
return new AppendCharFilter(reader, suffix);
}
}
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("append", AppendCharFilterFactory::new);
}
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>();
filters.put("mock", MockFactory::new);
filters.put("deprecated", DeprecatedTokenFilterFactory::new);
return filters;
}
@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return singletonList(PreConfiguredCharFilter.singleton("append", false, reader -> new AppendCharFilter(reader, "foo")));
}
};
registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
indexAnalyzers = registry.build(this.indexSettings);
maxTokenCount = IndexSettings.MAX_TOKEN_COUNT_SETTING.getDefault(settings);
idxMaxTokenCount = this.indexSettings.getMaxTokenCount();
}
Aggregations