use of org.opensearch.index.analysis.AbstractCharFilterFactory in project OpenSearch by opensearch-project.
the class TransportAnalyzeActionTests method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard").put("index.analysis.analyzer.custom_analyzer.filter", "mock").put("index.analysis.normalizer.my_normalizer.type", "custom").put("index.analysis.char_filter.my_append.type", "append").put("index.analysis.char_filter.my_append.suffix", "baz").put("index.analyze.max_token_count", 100).putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
this.indexSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
Environment environment = TestEnvironment.newEnvironment(settings);
AnalysisPlugin plugin = new AnalysisPlugin() {
class MockFactory extends AbstractTokenFilterFactory {
final CharacterRunAutomaton stopset;
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
if (settings.hasValue("stopword")) {
this.stopset = new CharacterRunAutomaton(Automata.makeString(settings.get("stopword")));
} else {
this.stopset = MockTokenFilter.ENGLISH_STOPSET;
}
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new MockTokenFilter(tokenStream, this.stopset);
}
}
class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}
@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecate("deprecated_token_filter_create", "Using deprecated token filter [deprecated]");
return tokenStream;
}
@Override
public TokenStream normalize(TokenStream tokenStream) {
deprecationLogger.deprecate("deprecated_token_filter_normalize", "Using deprecated token filter [deprecated]");
return tokenStream;
}
}
class AppendCharFilterFactory extends AbstractCharFilterFactory {
final String suffix;
AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name);
this.suffix = settings.get("suffix", "bar");
}
@Override
public Reader create(Reader reader) {
return new AppendCharFilter(reader, suffix);
}
}
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("append", AppendCharFilterFactory::new);
}
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>();
filters.put("mock", MockFactory::new);
filters.put("deprecated", DeprecatedTokenFilterFactory::new);
return filters;
}
@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return singletonList(PreConfiguredCharFilter.singleton("append", false, reader -> new AppendCharFilter(reader, "foo")));
}
};
registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
indexAnalyzers = registry.build(this.indexSettings);
maxTokenCount = IndexSettings.MAX_TOKEN_COUNT_SETTING.getDefault(settings);
idxMaxTokenCount = this.indexSettings.getMaxTokenCount();
}
Aggregations