Search in sources :

Example 1 with StopTokenFilterFactory

use of org.opensearch.index.analysis.StopTokenFilterFactory in project OpenSearch by opensearch-project.

the class AnalysisModuleTests method testSimpleConfiguration.

private void testSimpleConfiguration(Settings settings) throws IOException {
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
    Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
    assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    assertThat(custom1.tokenFilters().length, equalTo(2));
    StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
    assertThat(stop1.stopWords().size(), equalTo(1));
    // verify position increment gap
    analyzer = indexAnalyzers.get("custom6").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
    assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
    // check custom class name (my)
    analyzer = indexAnalyzers.get("custom4").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
    assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
}
Also used : StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MyFilterTokenFilterFactory(org.opensearch.index.analysis.MyFilterTokenFilterFactory)

Example 2 with StopTokenFilterFactory

use of org.opensearch.index.analysis.StopTokenFilterFactory in project OpenSearch by opensearch-project.

the class AnalysisModule method setupTokenFilters.

private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(List<AnalysisPlugin> plugins, HunspellService hunspellService) {
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
    tokenFilters.register("stop", StopTokenFilterFactory::new);
    // Add "standard" for old indices (bwc)
    tokenFilters.register("standard", new AnalysisProvider<TokenFilterFactory>() {

        @Override
        public TokenFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
            if (indexSettings.getIndexVersionCreated().before(LegacyESVersion.V_7_0_0)) {
                deprecationLogger.deprecate("standard_deprecation", "The [standard] token filter name is deprecated and will be removed in a future version.");
            } else {
                throw new IllegalArgumentException("The [standard] token filter has been removed.");
            }
            return new AbstractTokenFilterFactory(indexSettings, name, settings) {

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return tokenStream;
                }
            };
        }

        @Override
        public boolean requiresAnalysisSettings() {
            return false;
        }
    });
    tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
    tokenFilters.register("hunspell", requiresAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory(indexSettings, name, settings, hunspellService)));
    tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
    return tokenFilters;
}
Also used : TokenizerFactory(org.opensearch.index.analysis.TokenizerFactory) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) PreBuiltAnalyzerProviderFactory(org.opensearch.index.analysis.PreBuiltAnalyzerProviderFactory) StopAnalyzerProvider(org.opensearch.index.analysis.StopAnalyzerProvider) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) Version(org.opensearch.Version) StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) DeprecationLogger(org.opensearch.common.logging.DeprecationLogger) SimpleAnalyzerProvider(org.opensearch.index.analysis.SimpleAnalyzerProvider) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) LegacyESVersion(org.opensearch.LegacyESVersion) KeywordAnalyzerProvider(org.opensearch.index.analysis.KeywordAnalyzerProvider) AnalysisPlugin.requiresAnalysisSettings(org.opensearch.plugins.AnalysisPlugin.requiresAnalysisSettings) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) Locale(java.util.Locale) Map(java.util.Map) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) PreConfiguredTokenizer(org.opensearch.index.analysis.PreConfiguredTokenizer) Environment(org.opensearch.env.Environment) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter) TokenStream(org.apache.lucene.analysis.TokenStream) PreConfiguredTokenFilter(org.opensearch.index.analysis.PreConfiguredTokenFilter) AbstractTokenFilterFactory(org.opensearch.index.analysis.AbstractTokenFilterFactory) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) PreConfiguredCharFilter(org.opensearch.index.analysis.PreConfiguredCharFilter) ShingleTokenFilterFactory(org.opensearch.index.analysis.ShingleTokenFilterFactory) LowercaseNormalizerProvider(org.opensearch.index.analysis.LowercaseNormalizerProvider) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin) List(java.util.List) AnalyzerProvider(org.opensearch.index.analysis.AnalyzerProvider) NamedRegistry(org.opensearch.common.NamedRegistry) IndexSettings(org.opensearch.index.IndexSettings) WhitespaceAnalyzerProvider(org.opensearch.index.analysis.WhitespaceAnalyzerProvider) HunspellTokenFilterFactory(org.opensearch.index.analysis.HunspellTokenFilterFactory) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) StandardAnalyzerProvider(org.opensearch.index.analysis.StandardAnalyzerProvider) StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) TokenStream(org.apache.lucene.analysis.TokenStream) ShingleTokenFilterFactory(org.opensearch.index.analysis.ShingleTokenFilterFactory) IndexSettings(org.opensearch.index.IndexSettings) AbstractTokenFilterFactory(org.opensearch.index.analysis.AbstractTokenFilterFactory) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) AbstractTokenFilterFactory(org.opensearch.index.analysis.AbstractTokenFilterFactory) ShingleTokenFilterFactory(org.opensearch.index.analysis.ShingleTokenFilterFactory) HunspellTokenFilterFactory(org.opensearch.index.analysis.HunspellTokenFilterFactory) NamedRegistry(org.opensearch.common.NamedRegistry) Environment(org.opensearch.env.Environment) AnalysisPlugin.requiresAnalysisSettings(org.opensearch.plugins.AnalysisPlugin.requiresAnalysisSettings) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) HunspellTokenFilterFactory(org.opensearch.index.analysis.HunspellTokenFilterFactory) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Aggregations

StandardTokenizerFactory (org.opensearch.index.analysis.StandardTokenizerFactory)2 StopTokenFilterFactory (org.opensearch.index.analysis.StopTokenFilterFactory)2 IOException (java.io.IOException)1 Collections.unmodifiableMap (java.util.Collections.unmodifiableMap)1 List (java.util.List)1 Locale (java.util.Locale)1 Map (java.util.Map)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 LowerCaseFilter (org.apache.lucene.analysis.LowerCaseFilter)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 LegacyESVersion (org.opensearch.LegacyESVersion)1 Version (org.opensearch.Version)1 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)1 NamedRegistry (org.opensearch.common.NamedRegistry)1 DeprecationLogger (org.opensearch.common.logging.DeprecationLogger)1 Settings (org.opensearch.common.settings.Settings)1 Environment (org.opensearch.env.Environment)1 IndexSettings (org.opensearch.index.IndexSettings)1 AbstractTokenFilterFactory (org.opensearch.index.analysis.AbstractTokenFilterFactory)1 AnalysisRegistry (org.opensearch.index.analysis.AnalysisRegistry)1