use of org.opensearch.index.analysis.AbstractTokenFilterFactory in project OpenSearch by opensearch-project.
the class AnalysisModule method setupTokenFilters.
private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(List<AnalysisPlugin> plugins, HunspellService hunspellService) {
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
// Add "standard" for old indices (bwc)
tokenFilters.register("standard", new AnalysisProvider<TokenFilterFactory>() {
@Override
public TokenFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
if (indexSettings.getIndexVersionCreated().before(LegacyESVersion.V_7_0_0)) {
deprecationLogger.deprecate("standard_deprecation", "The [standard] token filter name is deprecated and will be removed in a future version.");
} else {
throw new IllegalArgumentException("The [standard] token filter has been removed.");
}
return new AbstractTokenFilterFactory(indexSettings, name, settings) {
@Override
public TokenStream create(TokenStream tokenStream) {
return tokenStream;
}
};
}
@Override
public boolean requiresAnalysisSettings() {
return false;
}
});
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("hunspell", requiresAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory(indexSettings, name, settings, hunspellService)));
tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
return tokenFilters;
}
use of org.opensearch.index.analysis.AbstractTokenFilterFactory in project OpenSearch by opensearch-project.
the class TransportAnalyzeActionTests method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard").put("index.analysis.analyzer.custom_analyzer.filter", "mock").put("index.analysis.normalizer.my_normalizer.type", "custom").put("index.analysis.char_filter.my_append.type", "append").put("index.analysis.char_filter.my_append.suffix", "baz").put("index.analyze.max_token_count", 100).putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
this.indexSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
Environment environment = TestEnvironment.newEnvironment(settings);
AnalysisPlugin plugin = new AnalysisPlugin() {
class MockFactory extends AbstractTokenFilterFactory {
final CharacterRunAutomaton stopset;
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
if (settings.hasValue("stopword")) {
this.stopset = new CharacterRunAutomaton(Automata.makeString(settings.get("stopword")));
} else {
this.stopset = MockTokenFilter.ENGLISH_STOPSET;
}
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new MockTokenFilter(tokenStream, this.stopset);
}
}
class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}
@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecate("deprecated_token_filter_create", "Using deprecated token filter [deprecated]");
return tokenStream;
}
@Override
public TokenStream normalize(TokenStream tokenStream) {
deprecationLogger.deprecate("deprecated_token_filter_normalize", "Using deprecated token filter [deprecated]");
return tokenStream;
}
}
class AppendCharFilterFactory extends AbstractCharFilterFactory {
final String suffix;
AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name);
this.suffix = settings.get("suffix", "bar");
}
@Override
public Reader create(Reader reader) {
return new AppendCharFilter(reader, suffix);
}
}
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("append", AppendCharFilterFactory::new);
}
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>();
filters.put("mock", MockFactory::new);
filters.put("deprecated", DeprecatedTokenFilterFactory::new);
return filters;
}
@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return singletonList(PreConfiguredCharFilter.singleton("append", false, reader -> new AppendCharFilter(reader, "foo")));
}
};
registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
indexAnalyzers = registry.build(this.indexSettings);
maxTokenCount = IndexSettings.MAX_TOKEN_COUNT_SETTING.getDefault(settings);
idxMaxTokenCount = this.indexSettings.getMaxTokenCount();
}
Aggregations