Search in sources :

Example 6 with AnalysisPlugin

use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.

the class AnalysisModuleTests method testPluginPreConfiguredCharFilters.

/**
 * Tests that plugins can register pre-configured char filters that vary in behavior based on OpenSearch version, Lucene version,
 * and that do not vary based on version at all.
 */
public void testPluginPreConfiguredCharFilters() throws IOException {
    boolean noVersionSupportsMultiTerm = randomBoolean();
    boolean luceneVersionSupportsMultiTerm = randomBoolean();
    boolean opensearchVersionSupportsMultiTerm = randomBoolean();
    AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {

        @Override
        public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
            return Arrays.asList(PreConfiguredCharFilter.singleton("no_version", noVersionSupportsMultiTerm, tokenStream -> new AppendCharFilter(tokenStream, "no_version")), PreConfiguredCharFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, (tokenStream, luceneVersion) -> new AppendCharFilter(tokenStream, luceneVersion.toString())), PreConfiguredCharFilter.openSearchVersion("opensearch_version", opensearchVersionSupportsMultiTerm, (tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString())));
        }

        @Override
        public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
            // Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash.
            return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
        }
    })).getAnalysisRegistry();
    Version version = VersionUtils.randomVersion(random());
    IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "keyword").put("index.analysis.analyzer.no_version.char_filter", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "keyword").put("index.analysis.analyzer.lucene_version.char_filter", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "keyword").put("index.analysis.analyzer.opensearch_version.char_filter", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
    assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "testno_version" });
    assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { "test" + version.luceneVersion });
    assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { "test" + version });
    assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), analyzers.get("no_version").normalize("", "test").utf8ToString());
    assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), analyzers.get("lucene_version").normalize("", "test").utf8ToString());
    assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""), analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
Also used : Arrays(java.util.Arrays) Matchers.either(org.hamcrest.Matchers.either) Version(org.opensearch.Version) StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) Collections.singletonList(java.util.Collections.singletonList) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) Directory(org.apache.lucene.store.Directory) Map(java.util.Map) PreConfiguredTokenizer(org.opensearch.index.analysis.PreConfiguredTokenizer) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) Path(java.nio.file.Path) PreConfiguredTokenFilter(org.opensearch.index.analysis.PreConfiguredTokenFilter) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) Reader(java.io.Reader) StandardCharsets(java.nio.charset.StandardCharsets) UncheckedIOException(java.io.UncheckedIOException) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) MatcherAssert(org.hamcrest.MatcherAssert) MockTokenizer(org.apache.lucene.tests.analysis.MockTokenizer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) TokenFilter(org.apache.lucene.analysis.TokenFilter) XContentType(org.opensearch.common.xcontent.XContentType) Dictionary(org.apache.lucene.analysis.hunspell.Dictionary) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) MyFilterTokenFilterFactory(org.opensearch.index.analysis.MyFilterTokenFilterFactory) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) TestEnvironment(org.opensearch.env.TestEnvironment) TokenizerFactory(org.opensearch.index.analysis.TokenizerFactory) Tokenizer(org.apache.lucene.analysis.Tokenizer) BaseTokenStreamTestCase.assertTokenStreamContents(org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) CharFilter(org.apache.lucene.analysis.CharFilter) Analysis(org.opensearch.index.analysis.Analysis) VersionUtils(org.opensearch.test.VersionUtils) Streams(org.opensearch.common.io.Streams) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) AnalysisProvider(org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider) Collections.singletonMap(java.util.Collections.singletonMap) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Environment(org.opensearch.env.Environment) TokenStream(org.apache.lucene.analysis.TokenStream) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) PreConfiguredCharFilter(org.opensearch.index.analysis.PreConfiguredCharFilter) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin) StringReader(java.io.StringReader) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) InputStream(java.io.InputStream) TokenizerFactory(org.opensearch.index.analysis.TokenizerFactory) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) MockTokenizer(org.apache.lucene.tests.analysis.MockTokenizer) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) Version(org.opensearch.Version) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) Collections.singletonList(java.util.Collections.singletonList) List(java.util.List) Map(java.util.Map) Collections.singletonMap(java.util.Collections.singletonMap) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Example 7 with AnalysisPlugin

use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.

the class AnalysisRegistryTests method testDeprecationsAndExceptions.

public void testDeprecationsAndExceptions() throws IOException {
    AnalysisPlugin plugin = new AnalysisPlugin() {

        class MockFactory extends AbstractTokenFilterFactory {

            MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                if (indexSettings.getIndexVersionCreated().equals(Version.CURRENT)) {
                    deprecationLogger.deprecate("deprecated_token_filter", "Using deprecated token filter [deprecated]");
                }
                return tokenStream;
            }
        }

        class ExceptionFactory extends AbstractTokenFilterFactory {

            ExceptionFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                if (indexSettings.getIndexVersionCreated().equals(Version.CURRENT)) {
                    throw new IllegalArgumentException("Cannot use token filter [exception]");
                }
                return tokenStream;
            }
        }

        class UnusedMockFactory extends AbstractTokenFilterFactory {

            UnusedMockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                deprecationLogger.deprecate("unused_token_filter", "Using deprecated token filter [unused]");
                return tokenStream;
            }
        }

        class NormalizerFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {

            NormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                deprecationLogger.deprecate("deprecated_normalizer", "Using deprecated token filter [deprecated_normalizer]");
                return tokenStream;
            }
        }

        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>();
            filters.put("deprecated", MockFactory::new);
            filters.put("unused", UnusedMockFactory::new);
            filters.put("deprecated_normalizer", NormalizerFactory::new);
            filters.put("exception", ExceptionFactory::new);
            return filters;
        }
    };
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.deprecated.type", "deprecated").put("index.analysis.analyzer.custom.tokenizer", "standard").putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    new AnalysisModule(TestEnvironment.newEnvironment(settings), singletonList(plugin)).getAnalysisRegistry().build(idxSettings);
    // We should only get a warning from the token filter that is referenced in settings
    assertWarnings("Using deprecated token filter [deprecated]");
    indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.getPreviousVersion()).put("index.analysis.filter.deprecated.type", "deprecated_normalizer").putList("index.analysis.normalizer.custom.filter", "lowercase", "deprecated_normalizer").put("index.analysis.filter.deprecated.type", "deprecated").put("index.analysis.filter.exception.type", "exception").put("index.analysis.analyzer.custom.tokenizer", "standard").putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated", "exception").build();
    idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    new AnalysisModule(TestEnvironment.newEnvironment(settings), singletonList(plugin)).getAnalysisRegistry().build(idxSettings);
    // We should only get a warning from the normalizer, because we're on a version where 'deprecated'
    // works fine
    assertWarnings("Using deprecated token filter [deprecated_normalizer]");
    indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.exception.type", "exception").put("index.analysis.analyzer.custom.tokenizer", "standard").putList("index.analysis.analyzer.custom.filter", "lowercase", "exception").build();
    IndexSettings exceptionSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
        new AnalysisModule(TestEnvironment.newEnvironment(settings), singletonList(plugin)).getAnalysisRegistry().build(exceptionSettings);
    });
    assertEquals("Cannot use token filter [exception]", e.getMessage());
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) HashMap(java.util.HashMap) IndexSettings(org.opensearch.index.IndexSettings) TestEnvironment(org.opensearch.env.TestEnvironment) Environment(org.opensearch.env.Environment) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) AnalysisProvider(org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Example 8 with AnalysisPlugin

use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.

the class AnalysisRegistryTests method testConfigureCamelCaseTokenFilter.

/**
 * Tests that {@code camelCase} filter names and {@code snake_case} filter names don't collide.
 */
public void testConfigureCamelCaseTokenFilter() throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("index.analysis.filter.testFilter.type", "mock").put("index.analysis.filter.test_filter.type", "mock").put("index.analysis.analyzer.custom_analyzer_with_camel_case.tokenizer", "standard").putList("index.analysis.analyzer.custom_analyzer_with_camel_case.filter", "lowercase", "testFilter").put("index.analysis.analyzer.custom_analyzer_with_snake_case.tokenizer", "standard").putList("index.analysis.analyzer.custom_analyzer_with_snake_case.filter", "lowercase", "test_filter").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    /* The snake_case version of the name should not filter out any stopwords while the
         * camelCase version will filter out English stopwords. */
    AnalysisPlugin plugin = new AnalysisPlugin() {

        class MockFactory extends AbstractTokenFilterFactory {

            MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                if (name().equals("test_filter")) {
                    return new MockTokenFilter(tokenStream, MockTokenFilter.EMPTY_STOPSET);
                }
                return new MockTokenFilter(tokenStream, MockTokenFilter.ENGLISH_STOPSET);
            }
        }

        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("mock", MockFactory::new);
        }
    };
    IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), singletonList(plugin)).getAnalysisRegistry().build(idxSettings);
    // This shouldn't contain English stopwords
    try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_with_camel_case")) {
        assertNotNull(custom_analyser);
        TokenStream tokenStream = custom_analyser.tokenStream("foo", "has a foo");
        tokenStream.reset();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        assertTrue(tokenStream.incrementToken());
        assertEquals("has", charTermAttribute.toString());
        assertTrue(tokenStream.incrementToken());
        assertEquals("foo", charTermAttribute.toString());
        assertFalse(tokenStream.incrementToken());
    }
    // This *should* contain English stopwords
    try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_with_snake_case")) {
        assertNotNull(custom_analyser);
        TokenStream tokenStream = custom_analyser.tokenStream("foo", "has a foo");
        tokenStream.reset();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        assertTrue(tokenStream.incrementToken());
        assertEquals("has", charTermAttribute.toString());
        assertTrue(tokenStream.incrementToken());
        assertEquals("a", charTermAttribute.toString());
        assertTrue(tokenStream.incrementToken());
        assertEquals("foo", charTermAttribute.toString());
        assertFalse(tokenStream.incrementToken());
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) MockTokenFilter(org.apache.lucene.tests.analysis.MockTokenFilter) IndexSettings(org.opensearch.index.IndexSettings) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TestEnvironment(org.opensearch.env.TestEnvironment) Environment(org.opensearch.env.Environment) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) AnalysisProvider(org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Example 9 with AnalysisPlugin

use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.

the class TransportAnalyzeActionTests method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard").put("index.analysis.analyzer.custom_analyzer.filter", "mock").put("index.analysis.normalizer.my_normalizer.type", "custom").put("index.analysis.char_filter.my_append.type", "append").put("index.analysis.char_filter.my_append.suffix", "baz").put("index.analyze.max_token_count", 100).putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
    this.indexSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    Environment environment = TestEnvironment.newEnvironment(settings);
    AnalysisPlugin plugin = new AnalysisPlugin() {

        class MockFactory extends AbstractTokenFilterFactory {

            final CharacterRunAutomaton stopset;

            MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
                if (settings.hasValue("stopword")) {
                    this.stopset = new CharacterRunAutomaton(Automata.makeString(settings.get("stopword")));
                } else {
                    this.stopset = MockTokenFilter.ENGLISH_STOPSET;
                }
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                return new MockTokenFilter(tokenStream, this.stopset);
            }
        }

        class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {

            DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }

            @Override
            public TokenStream create(TokenStream tokenStream) {
                deprecationLogger.deprecate("deprecated_token_filter_create", "Using deprecated token filter [deprecated]");
                return tokenStream;
            }

            @Override
            public TokenStream normalize(TokenStream tokenStream) {
                deprecationLogger.deprecate("deprecated_token_filter_normalize", "Using deprecated token filter [deprecated]");
                return tokenStream;
            }
        }

        class AppendCharFilterFactory extends AbstractCharFilterFactory {

            final String suffix;

            AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
                super(indexSettings, name);
                this.suffix = settings.get("suffix", "bar");
            }

            @Override
            public Reader create(Reader reader) {
                return new AppendCharFilter(reader, suffix);
            }
        }

        @Override
        public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
            return singletonMap("append", AppendCharFilterFactory::new);
        }

        @Override
        public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
            return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
        }

        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>();
            filters.put("mock", MockFactory::new);
            filters.put("deprecated", DeprecatedTokenFilterFactory::new);
            return filters;
        }

        @Override
        public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
            return singletonList(PreConfiguredCharFilter.singleton("append", false, reader -> new AppendCharFilter(reader, "foo")));
        }
    };
    registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
    indexAnalyzers = registry.build(this.indexSettings);
    maxTokenCount = IndexSettings.MAX_TOKEN_COUNT_SETTING.getDefault(settings);
    idxMaxTokenCount = this.indexSettings.getMaxTokenCount();
}
Also used : TestEnvironment(org.opensearch.env.TestEnvironment) TokenizerFactory(org.opensearch.index.analysis.TokenizerFactory) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) Version(org.opensearch.Version) HashMap(java.util.HashMap) Collections.singletonList(java.util.Collections.singletonList) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) AppendCharFilter(org.opensearch.indices.analysis.AnalysisModuleTests.AppendCharFilter) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) AnalyzeAction(org.opensearch.action.admin.indices.analyze.AnalyzeAction) AbstractCharFilterFactory(org.opensearch.index.analysis.AbstractCharFilterFactory) Map(java.util.Map) AnalysisProvider(org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider) Collections.singletonMap(java.util.Collections.singletonMap) UUIDs(org.opensearch.common.UUIDs) Automata(org.apache.lucene.util.automaton.Automata) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Environment(org.opensearch.env.Environment) NormalizingTokenFilterFactory(org.opensearch.index.analysis.NormalizingTokenFilterFactory) TokenStream(org.apache.lucene.analysis.TokenStream) AbstractTokenFilterFactory(org.opensearch.index.analysis.AbstractTokenFilterFactory) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) MockTokenFilter(org.apache.lucene.tests.analysis.MockTokenFilter) TransportAnalyzeAction(org.opensearch.action.admin.indices.analyze.TransportAnalyzeAction) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) Reader(java.io.Reader) Mockito.when(org.mockito.Mockito.when) IndexService(org.opensearch.index.IndexService) PreConfiguredCharFilter(org.opensearch.index.analysis.PreConfiguredCharFilter) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin) List(java.util.List) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) MockTokenizer(org.apache.lucene.tests.analysis.MockTokenizer) IndexSettings(org.opensearch.index.IndexSettings) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) Mockito.mock(org.mockito.Mockito.mock) TokenStream(org.apache.lucene.analysis.TokenStream) MockTokenFilter(org.apache.lucene.tests.analysis.MockTokenFilter) HashMap(java.util.HashMap) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) IndexSettings(org.opensearch.index.IndexSettings) PreConfiguredCharFilter(org.opensearch.index.analysis.PreConfiguredCharFilter) Reader(java.io.Reader) AppendCharFilter(org.opensearch.indices.analysis.AnalysisModuleTests.AppendCharFilter) MockTokenizer(org.apache.lucene.tests.analysis.MockTokenizer) TestEnvironment(org.opensearch.env.TestEnvironment) Environment(org.opensearch.env.Environment) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) AnalysisProvider(org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Example 10 with AnalysisPlugin

use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.

the class AnalysisModuleTests method testPluginPreConfiguredTokenizers.

/**
 * Tests that plugins can register pre-configured token filters that vary in behavior based on OpenSearch version, Lucene version,
 * and that do not vary based on version at all.
 */
public void testPluginPreConfiguredTokenizers() throws IOException {
    // Simple tokenizer that always spits out a single token with some preconfigured characters
    final class FixedTokenizer extends Tokenizer {

        private final CharTermAttribute term = addAttribute(CharTermAttribute.class);

        private final char[] chars;

        private boolean read = false;

        protected FixedTokenizer(String chars) {
            this.chars = chars.toCharArray();
        }

        @Override
        public boolean incrementToken() throws IOException {
            if (read) {
                return false;
            }
            clearAttributes();
            read = true;
            term.resizeBuffer(chars.length);
            System.arraycopy(chars, 0, term.buffer(), 0, chars.length);
            term.setLength(chars.length);
            return true;
        }

        @Override
        public void reset() throws IOException {
            super.reset();
            read = false;
        }
    }
    AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {

        @Override
        public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
            return Arrays.asList(PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version")), PreConfiguredTokenizer.luceneVersion("lucene_version", luceneVersion -> new FixedTokenizer(luceneVersion.toString())), PreConfiguredTokenizer.openSearchVersion("opensearch_version", esVersion -> new FixedTokenizer(esVersion.toString())));
        }
    })).getAnalysisRegistry();
    Version version = VersionUtils.randomVersion(random());
    IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
    assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "no_version" });
    assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { version.luceneVersion.toString() });
    assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { version.toString() });
// These are current broken by https://github.com/elastic/elasticsearch/issues/24752
// assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""),
// analyzers.get("no_version").normalize("", "test").utf8ToString());
// assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""),
// analyzers.get("lucene_version").normalize("", "test").utf8ToString());
// assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""),
// analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
Also used : AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Version(org.opensearch.Version) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) Collections.singletonList(java.util.Collections.singletonList) List(java.util.List) PreConfiguredTokenizer(org.opensearch.index.analysis.PreConfiguredTokenizer) MockTokenizer(org.apache.lucene.tests.analysis.MockTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Aggregations

AnalysisPlugin (org.opensearch.plugins.AnalysisPlugin)10 Settings (org.opensearch.common.settings.Settings)7 Environment (org.opensearch.env.Environment)7 IndexSettings (org.opensearch.index.IndexSettings)7 List (java.util.List)6 TokenStream (org.apache.lucene.analysis.TokenStream)6 Version (org.opensearch.Version)6 AnalysisRegistry (org.opensearch.index.analysis.AnalysisRegistry)6 TestEnvironment (org.opensearch.env.TestEnvironment)5 PreConfiguredTokenizer (org.opensearch.index.analysis.PreConfiguredTokenizer)5 IOException (java.io.IOException)4 Collections.singletonList (java.util.Collections.singletonList)4 Map (java.util.Map)4 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)4 CharFilterFactory (org.opensearch.index.analysis.CharFilterFactory)4 IndexAnalyzers (org.opensearch.index.analysis.IndexAnalyzers)4 PreConfiguredCharFilter (org.opensearch.index.analysis.PreConfiguredCharFilter)4 TokenFilterFactory (org.opensearch.index.analysis.TokenFilterFactory)4 TokenizerFactory (org.opensearch.index.analysis.TokenizerFactory)4 NamedRegistry (org.opensearch.common.NamedRegistry)3