Search in sources :

Example 1 with AnalysisRegistry

use of org.opensearch.index.analysis.AnalysisRegistry in project OpenSearch by opensearch-project.

the class AnalysisModuleTests method testPluginPreConfiguredCharFilters.

/**
 * Tests that plugins can register pre-configured char filters that vary in behavior based on OpenSearch version, Lucene version,
 * and that do not vary based on version at all.
 */
public void testPluginPreConfiguredCharFilters() throws IOException {
    boolean noVersionSupportsMultiTerm = randomBoolean();
    boolean luceneVersionSupportsMultiTerm = randomBoolean();
    boolean opensearchVersionSupportsMultiTerm = randomBoolean();
    AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {

        @Override
        public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
            return Arrays.asList(PreConfiguredCharFilter.singleton("no_version", noVersionSupportsMultiTerm, tokenStream -> new AppendCharFilter(tokenStream, "no_version")), PreConfiguredCharFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, (tokenStream, luceneVersion) -> new AppendCharFilter(tokenStream, luceneVersion.toString())), PreConfiguredCharFilter.openSearchVersion("opensearch_version", opensearchVersionSupportsMultiTerm, (tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString())));
        }

        @Override
        public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
            // Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash.
            return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
        }
    })).getAnalysisRegistry();
    Version version = VersionUtils.randomVersion(random());
    IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "keyword").put("index.analysis.analyzer.no_version.char_filter", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "keyword").put("index.analysis.analyzer.lucene_version.char_filter", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "keyword").put("index.analysis.analyzer.opensearch_version.char_filter", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
    assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "testno_version" });
    assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { "test" + version.luceneVersion });
    assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { "test" + version });
    assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), analyzers.get("no_version").normalize("", "test").utf8ToString());
    assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), analyzers.get("lucene_version").normalize("", "test").utf8ToString());
    assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""), analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
Also used : Arrays(java.util.Arrays) Matchers.either(org.hamcrest.Matchers.either) Version(org.opensearch.Version) StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) Collections.singletonList(java.util.Collections.singletonList) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) Directory(org.apache.lucene.store.Directory) Map(java.util.Map) PreConfiguredTokenizer(org.opensearch.index.analysis.PreConfiguredTokenizer) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) Path(java.nio.file.Path) PreConfiguredTokenFilter(org.opensearch.index.analysis.PreConfiguredTokenFilter) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) Reader(java.io.Reader) StandardCharsets(java.nio.charset.StandardCharsets) UncheckedIOException(java.io.UncheckedIOException) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) MatcherAssert(org.hamcrest.MatcherAssert) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) TokenFilter(org.apache.lucene.analysis.TokenFilter) BaseTokenStreamTestCase.assertTokenStreamContents(org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents) XContentType(org.opensearch.common.xcontent.XContentType) Dictionary(org.apache.lucene.analysis.hunspell.Dictionary) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) MyFilterTokenFilterFactory(org.opensearch.index.analysis.MyFilterTokenFilterFactory) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) TestEnvironment(org.opensearch.env.TestEnvironment) TokenizerFactory(org.opensearch.index.analysis.TokenizerFactory) Tokenizer(org.apache.lucene.analysis.Tokenizer) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) CharFilter(org.apache.lucene.analysis.CharFilter) LegacyESVersion(org.opensearch.LegacyESVersion) Analysis(org.opensearch.index.analysis.Analysis) VersionUtils(org.opensearch.test.VersionUtils) Streams(org.opensearch.common.io.Streams) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) AnalysisProvider(org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider) Collections.singletonMap(java.util.Collections.singletonMap) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Environment(org.opensearch.env.Environment) TokenStream(org.apache.lucene.analysis.TokenStream) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) PreConfiguredCharFilter(org.opensearch.index.analysis.PreConfiguredCharFilter) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin) StringReader(java.io.StringReader) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) InputStream(java.io.InputStream) TokenizerFactory(org.opensearch.index.analysis.TokenizerFactory) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) Version(org.opensearch.Version) LegacyESVersion(org.opensearch.LegacyESVersion) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) Collections.singletonList(java.util.Collections.singletonList) List(java.util.List) Map(java.util.Map) Collections.singletonMap(java.util.Collections.singletonMap) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Example 2 with AnalysisRegistry

use of org.opensearch.index.analysis.AnalysisRegistry in project OpenSearch by opensearch-project.

the class AnalysisModuleTests method testPluginPreConfiguredTokenFilters.

/**
 * Tests that plugins can register pre-configured token filters that vary in behavior based on OpenSearch version, Lucene version,
 * and that do not vary based on version at all.
 */
public void testPluginPreConfiguredTokenFilters() throws IOException {
    boolean noVersionSupportsMultiTerm = randomBoolean();
    boolean luceneVersionSupportsMultiTerm = randomBoolean();
    boolean opensearchVersionSupportsMultiTerm = randomBoolean();
    AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {

        @Override
        public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
            return Arrays.asList(PreConfiguredTokenFilter.singleton("no_version", noVersionSupportsMultiTerm, tokenStream -> new AppendTokenFilter(tokenStream, "no_version")), PreConfiguredTokenFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, (tokenStream, luceneVersion) -> new AppendTokenFilter(tokenStream, luceneVersion.toString())), PreConfiguredTokenFilter.openSearchVersion("opensearch_version", opensearchVersionSupportsMultiTerm, (tokenStream, esVersion) -> new AppendTokenFilter(tokenStream, esVersion.toString())));
        }
    })).getAnalysisRegistry();
    Version version = VersionUtils.randomVersion(random());
    IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "standard").put("index.analysis.analyzer.no_version.filter", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "standard").put("index.analysis.analyzer.lucene_version.filter", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "standard").put("index.analysis.analyzer.opensearch_version.filter", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
    assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "testno_version" });
    assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { "test" + version.luceneVersion });
    assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { "test" + version });
    assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), analyzers.get("no_version").normalize("", "test").utf8ToString());
    assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), analyzers.get("lucene_version").normalize("", "test").utf8ToString());
    assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""), analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
Also used : AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) Version(org.opensearch.Version) LegacyESVersion(org.opensearch.LegacyESVersion) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) Collections.singletonList(java.util.Collections.singletonList) List(java.util.List) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin)

Example 3 with AnalysisRegistry

use of org.opensearch.index.analysis.AnalysisRegistry in project OpenSearch by opensearch-project.

the class OpenSearchTestCase method createTestAnalysis.

/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(IndexSettings indexSettings, Settings nodeSettings, AnalysisPlugin... analysisPlugins) throws IOException {
    Environment env = TestEnvironment.newEnvironment(nodeSettings);
    AnalysisModule analysisModule = new AnalysisModule(env, Arrays.asList(analysisPlugins));
    AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
    return new TestAnalysis(analysisRegistry.build(indexSettings), analysisRegistry.buildTokenFilterFactories(indexSettings), analysisRegistry.buildTokenizerFactories(indexSettings), analysisRegistry.buildCharFilterFactories(indexSettings));
}
Also used : AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) NodeEnvironment(org.opensearch.env.NodeEnvironment) Environment(org.opensearch.env.Environment) TestEnvironment(org.opensearch.env.TestEnvironment) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule)

Example 4 with AnalysisRegistry

use of org.opensearch.index.analysis.AnalysisRegistry in project OpenSearch by opensearch-project.

the class IndexModuleTests method testIndexAnalyzersCleanedUpIfIndexServiceCreationFails.

public void testIndexAnalyzersCleanedUpIfIndexServiceCreationFails() {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
    final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
    final HashSet<Analyzer> openAnalyzers = new HashSet<>();
    final AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> analysisProvider = (i, e, n, s) -> new AnalyzerProvider<Analyzer>() {

        @Override
        public String name() {
            return "test";
        }

        @Override
        public AnalyzerScope scope() {
            return AnalyzerScope.INDEX;
        }

        @Override
        public Analyzer get() {
            final Analyzer analyzer = new Analyzer() {

                @Override
                protected TokenStreamComponents createComponents(String fieldName) {
                    return new TokenStreamComponents(new StandardTokenizer());
                }

                @Override
                public void close() {
                    super.close();
                    openAnalyzers.remove(this);
                }
            };
            openAnalyzers.add(analyzer);
            return analyzer;
        }
    };
    final AnalysisRegistry analysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), singletonMap("test", analysisProvider), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
    IndexModule module = createIndexModule(indexSettings, analysisRegistry);
    // causes index service creation to fail
    threadPool.shutdown();
    expectThrows(OpenSearchRejectedExecutionException.class, () -> newIndexService(module));
    assertThat(openAnalyzers, empty());
}
Also used : OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) IndexRemovalReason(org.opensearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason) CREATE_INDEX(org.opensearch.index.IndexService.IndexCreationContext.CREATE_INDEX) IndicesModule(org.opensearch.indices.IndicesModule) Matchers.hasToString(org.hamcrest.Matchers.hasToString) CheckedFunction(org.opensearch.common.CheckedFunction) Term(org.apache.lucene.index.Term) NoneCircuitBreakerService(org.opensearch.indices.breaker.NoneCircuitBreakerService) TestThreadPool(org.opensearch.threadpool.TestThreadPool) Version(org.opensearch.Version) AnalyzerScope(org.opensearch.index.analysis.AnalyzerScope) CircuitBreaker(org.opensearch.common.breaker.CircuitBreaker) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) AssertingDirectoryReader(org.apache.lucene.index.AssertingDirectoryReader) IndexStorePlugin(org.opensearch.plugins.IndexStorePlugin) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) IndexFieldDataCache(org.opensearch.index.fielddata.IndexFieldDataCache) RecoveryState(org.opensearch.indices.recovery.RecoveryState) Directory(org.apache.lucene.store.Directory) Property(org.opensearch.common.settings.Setting.Property) Map(java.util.Map) MockEngineFactory(org.opensearch.test.engine.MockEngineFactory) QueryCachingPolicy(org.apache.lucene.search.QueryCachingPolicy) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) AlreadySetException(org.apache.lucene.util.SetOnce.AlreadySetException) NodeEnvironment(org.opensearch.env.NodeEnvironment) ScriptService(org.opensearch.script.ScriptService) IndexEventListener(org.opensearch.index.shard.IndexEventListener) DirectoryReader(org.apache.lucene.index.DirectoryReader) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) Engine(org.opensearch.index.engine.Engine) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) FieldInvertState(org.apache.lucene.index.FieldInvertState) EngineConfigFactory(org.opensearch.index.engine.EngineConfigFactory) FsDirectoryFactory(org.opensearch.index.store.FsDirectoryFactory) AnalyzerProvider(org.opensearch.index.analysis.AnalyzerProvider) Matchers.is(org.hamcrest.Matchers.is) BigArrays(org.opensearch.common.util.BigArrays) ShardLock(org.opensearch.env.ShardLock) ClusterServiceUtils(org.opensearch.test.ClusterServiceUtils) Matchers.containsString(org.hamcrest.Matchers.containsString) BM25Similarity(org.apache.lucene.search.similarities.BM25Similarity) IndexNameExpressionResolver(org.opensearch.cluster.metadata.IndexNameExpressionResolver) Uid(org.opensearch.index.mapper.Uid) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) Mockito.mock(org.mockito.Mockito.mock) TestEnvironment(org.opensearch.env.TestEnvironment) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) SimilarityService(org.opensearch.index.similarity.SimilarityService) QueryCache(org.opensearch.index.cache.query.QueryCache) ThreadPool(org.opensearch.threadpool.ThreadPool) Weight(org.apache.lucene.search.Weight) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IndicesFieldDataCache(org.opensearch.indices.fielddata.cache.IndicesFieldDataCache) RecoverySource(org.opensearch.cluster.routing.RecoverySource) InternalEngineTests(org.opensearch.index.engine.InternalEngineTests) HashSet(java.util.HashSet) ReaderContext(org.opensearch.search.internal.ReaderContext) IndexingOperationListener(org.opensearch.index.shard.IndexingOperationListener) MapperRegistry(org.opensearch.indices.mapper.MapperRegistry) Similarity(org.apache.lucene.search.similarities.Similarity) ParsedDocument(org.opensearch.index.mapper.ParsedDocument) Collections.singletonMap(java.util.Collections.singletonMap) ShardPath(org.opensearch.index.shard.ShardPath) IndicesQueryCache(org.opensearch.indices.IndicesQueryCache) TermStatistics(org.apache.lucene.search.TermStatistics) IndexQueryCache(org.opensearch.index.cache.query.IndexQueryCache) Environment(org.opensearch.env.Environment) InternalEngineFactory(org.opensearch.index.engine.InternalEngineFactory) Collections.emptyMap(java.util.Collections.emptyMap) Matchers.empty(org.hamcrest.Matchers.empty) Setting(org.opensearch.common.settings.Setting) Analyzer(org.apache.lucene.analysis.Analyzer) DisabledQueryCache(org.opensearch.index.cache.query.DisabledQueryCache) Matchers(org.hamcrest.Matchers) IOException(java.io.IOException) SearchOperationListener(org.opensearch.index.shard.SearchOperationListener) ShardRouting(org.opensearch.cluster.routing.ShardRouting) IOUtils(org.opensearch.core.internal.io.IOUtils) ShardId(org.opensearch.index.shard.ShardId) TimeUnit(java.util.concurrent.TimeUnit) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) CircuitBreakerService(org.opensearch.indices.breaker.CircuitBreakerService) ClusterService(org.opensearch.cluster.service.ClusterService) Collections(java.util.Collections) PageCacheRecycler(org.opensearch.common.util.PageCacheRecycler) NonNegativeScoresSimilarity(org.opensearch.index.similarity.NonNegativeScoresSimilarity) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) Analyzer(org.apache.lucene.analysis.Analyzer) AnalyzerProvider(org.opensearch.index.analysis.AnalyzerProvider) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) HashSet(java.util.HashSet)

Example 5 with AnalysisRegistry

use of org.opensearch.index.analysis.AnalysisRegistry in project OpenSearch by opensearch-project.

the class IndexModuleTests method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    indicesQueryCache = new IndicesQueryCache(settings);
    indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
    index = indexSettings.getIndex();
    environment = TestEnvironment.newEnvironment(settings);
    emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
    threadPool = new TestThreadPool("test");
    circuitBreakerService = new NoneCircuitBreakerService();
    PageCacheRecycler pageCacheRecycler = new PageCacheRecycler(settings);
    bigArrays = new BigArrays(pageCacheRecycler, circuitBreakerService, CircuitBreaker.REQUEST);
    scriptService = new ScriptService(settings, Collections.emptyMap(), Collections.emptyMap());
    clusterService = ClusterServiceUtils.createClusterService(threadPool);
    nodeEnvironment = new NodeEnvironment(settings, environment);
    mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
}
Also used : IndicesQueryCache(org.opensearch.indices.IndicesQueryCache) ScriptService(org.opensearch.script.ScriptService) AnalysisRegistry(org.opensearch.index.analysis.AnalysisRegistry) BigArrays(org.opensearch.common.util.BigArrays) IndicesModule(org.opensearch.indices.IndicesModule) PageCacheRecycler(org.opensearch.common.util.PageCacheRecycler) NodeEnvironment(org.opensearch.env.NodeEnvironment) TestThreadPool(org.opensearch.threadpool.TestThreadPool) NoneCircuitBreakerService(org.opensearch.indices.breaker.NoneCircuitBreakerService)

Aggregations

AnalysisRegistry (org.opensearch.index.analysis.AnalysisRegistry)6 Version (org.opensearch.Version)4 Collections.singletonList (java.util.Collections.singletonList)3 List (java.util.List)3 IOException (java.io.IOException)2 Collections.singletonMap (java.util.Collections.singletonMap)2 Map (java.util.Map)2 Set (java.util.Set)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)2 Tokenizer (org.apache.lucene.analysis.Tokenizer)2 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)2 Directory (org.apache.lucene.store.Directory)2 LegacyESVersion (org.opensearch.LegacyESVersion)2 BigArrays (org.opensearch.common.util.BigArrays)2 PageCacheRecycler (org.opensearch.common.util.PageCacheRecycler)2 Environment (org.opensearch.env.Environment)2 NodeEnvironment (org.opensearch.env.NodeEnvironment)2 TestEnvironment (org.opensearch.env.TestEnvironment)2 IndexAnalyzers (org.opensearch.index.analysis.IndexAnalyzers)2