Search in sources :

Example 26 with Settings

use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.

the class KeepTypesFilterFactoryTests method testKeepTypesInclude.

public void testKeepTypesInclude() throws IOException {
    Settings.Builder settingsBuilder = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(BASE_SETTING + ".type", "keep_types").putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" });
    // either use default mode or set "include" mode explicitly
    if (random().nextBoolean()) {
        settingsBuilder.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, KeepTypesFilterFactory.KeepTypesMode.INCLUDE);
    }
    Settings settings = settingsBuilder.build();
    OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
    TokenFilterFactory tokenFilter = analysis.tokenFilter.get("keep_numbers");
    assertThat(tokenFilter, instanceOf(KeepTypesFilterFactory.class));
    String source = "Hello 123 world";
    String[] expected = new String[] { "123" };
    Tokenizer tokenizer = new StandardTokenizer();
    tokenizer.setReader(new StringReader(source));
    assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] { 2 });
}
Also used : OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) StringReader(java.io.StringReader) Tokenizer(org.apache.lucene.analysis.Tokenizer) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) Settings(org.opensearch.common.settings.Settings) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory)

Example 27 with Settings

use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.

the class KeepTypesFilterFactoryTests method testKeepTypesException.

public void testKeepTypesException() throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(BASE_SETTING + ".type", "keep_types").putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" }).put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, "bad_parameter").build();
    IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
    assertEquals("`keep_types` tokenfilter mode can only be [include] or [exclude] but was [bad_parameter].", ex.getMessage());
}
Also used : Settings(org.opensearch.common.settings.Settings)

Example 28 with Settings

use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.

the class MinHashFilterFactoryTests method testSettings.

public void testSettings() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.test_min_hash.type", "min_hash").put("index.analysis.filter.test_min_hash.hash_count", "1").put("index.analysis.filter.test_min_hash.bucket_count", "2").put("index.analysis.filter.test_min_hash.hash_set_size", "1").put("index.analysis.filter.test_min_hash.with_rotation", false).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
    TokenFilterFactory tokenFilter = analysis.tokenFilter.get("test_min_hash");
    String source = "sushi";
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader(source));
    // despite the fact that bucket_count is 2 and hash_set_size is 1,
    // because with_rotation is false, we only expect 1 token here.
    assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), 1);
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) StringReader(java.io.StringReader) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) Settings(org.opensearch.common.settings.Settings) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory)

Example 29 with Settings

use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.

the class MinHashFilterFactoryTests method testDefault.

public void testDefault() throws IOException {
    int default_hash_count = 1;
    int default_bucket_size = 512;
    int default_hash_set_size = 1;
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
    TokenFilterFactory tokenFilter = analysis.tokenFilter.get("min_hash");
    String source = "the quick brown fox";
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader(source));
    // with_rotation is true by default, and hash_set_size is 1, so even though the source doesn't
    // have enough tokens to fill all the buckets, we still expect 512 tokens.
    assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), default_hash_count * default_bucket_size * default_hash_set_size);
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) StringReader(java.io.StringReader) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) Settings(org.opensearch.common.settings.Settings) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory)

Example 30 with Settings

use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.

the class WhitespaceTokenizerFactoryTests method testMaxTokenLength.

public void testMaxTokenLength() throws IOException {
    final Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
    IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings);
    final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build();
    WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", settings).create();
    try (Reader reader = new StringReader("one, two, three")) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" });
    }
    final Settings defaultSettings = Settings.EMPTY;
    tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", defaultSettings).create();
    String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256);
    try (Reader reader = new StringReader(veryLongToken)) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer, new String[] { veryLongToken.substring(0, 255), veryLongToken.substring(255) });
    }
    final Settings tooLongSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build();
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).create());
    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage());
    final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1).build();
    e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).create());
    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) IndexSettings(org.opensearch.index.IndexSettings) StringReader(java.io.StringReader) Reader(java.io.Reader) StringReader(java.io.StringReader) Index(org.opensearch.index.Index) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Aggregations

Settings (org.opensearch.common.settings.Settings)1293 IndexSettings (org.opensearch.index.IndexSettings)304 Matchers.containsString (org.hamcrest.Matchers.containsString)221 ClusterSettings (org.opensearch.common.settings.ClusterSettings)196 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)158 ClusterState (org.opensearch.cluster.ClusterState)150 IOException (java.io.IOException)127 ArrayList (java.util.ArrayList)127 Version (org.opensearch.Version)122 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)121 List (java.util.List)108 Index (org.opensearch.index.Index)106 Path (java.nio.file.Path)104 Map (java.util.Map)97 HashMap (java.util.HashMap)95 HashSet (java.util.HashSet)86 ShardId (org.opensearch.index.shard.ShardId)86 IndexScopedSettings (org.opensearch.common.settings.IndexScopedSettings)83 Environment (org.opensearch.env.Environment)82 ShardRouting (org.opensearch.cluster.routing.ShardRouting)71