use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class KeepTypesFilterFactoryTests method testKeepTypesInclude.
public void testKeepTypesInclude() throws IOException {
Settings.Builder settingsBuilder = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(BASE_SETTING + ".type", "keep_types").putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" });
// either use default mode or set "include" mode explicitly
if (random().nextBoolean()) {
settingsBuilder.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, KeepTypesFilterFactory.KeepTypesMode.INCLUDE);
}
Settings settings = settingsBuilder.build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("keep_numbers");
assertThat(tokenFilter, instanceOf(KeepTypesFilterFactory.class));
String source = "Hello 123 world";
String[] expected = new String[] { "123" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] { 2 });
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class KeepTypesFilterFactoryTests method testKeepTypesException.
public void testKeepTypesException() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(BASE_SETTING + ".type", "keep_types").putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" }).put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, "bad_parameter").build();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
assertEquals("`keep_types` tokenfilter mode can only be [include] or [exclude] but was [bad_parameter].", ex.getMessage());
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class MinHashFilterFactoryTests method testSettings.
public void testSettings() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.test_min_hash.type", "min_hash").put("index.analysis.filter.test_min_hash.hash_count", "1").put("index.analysis.filter.test_min_hash.bucket_count", "2").put("index.analysis.filter.test_min_hash.hash_set_size", "1").put("index.analysis.filter.test_min_hash.with_rotation", false).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("test_min_hash");
String source = "sushi";
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
// despite the fact that bucket_count is 2 and hash_set_size is 1,
// because with_rotation is false, we only expect 1 token here.
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), 1);
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class MinHashFilterFactoryTests method testDefault.
public void testDefault() throws IOException {
int default_hash_count = 1;
int default_bucket_size = 512;
int default_hash_set_size = 1;
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("min_hash");
String source = "the quick brown fox";
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
// with_rotation is true by default, and hash_set_size is 1, so even though the source doesn't
// have enough tokens to fill all the buckets, we still expect 512 tokens.
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), default_hash_count * default_bucket_size * default_hash_set_size);
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class WhitespaceTokenizerFactoryTests method testMaxTokenLength.
public void testMaxTokenLength() throws IOException {
final Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings);
final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build();
WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", settings).create();
try (Reader reader = new StringReader("one, two, three")) {
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" });
}
final Settings defaultSettings = Settings.EMPTY;
tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", defaultSettings).create();
String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256);
try (Reader reader = new StringReader(veryLongToken)) {
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[] { veryLongToken.substring(0, 255), veryLongToken.substring(255) });
}
final Settings tooLongSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).create());
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage());
final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1).build();
e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).create());
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
}
Aggregations