use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class KeepFilterFactoryTests method testKeepWordsPathSettings.
public void testKeepWordsPathSettings() {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.non_broken_keep_filter.type", "keep").put("index.analysis.filter.non_broken_keep_filter.keep_words_path", "does/not/exists.txt").build();
try {
// test our none existing setup is picked up
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
fail("expected an exception due to non existent keep_words_path");
} catch (IllegalArgumentException e) {
} catch (IOException e) {
fail("expected IAE");
}
settings = Settings.builder().put(settings).putList("index.analysis.filter.non_broken_keep_filter.keep_words", "test").build();
try {
// test our none existing setup is picked up
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
fail("expected an exception indicating that you can't use [keep_words_path] with [keep_words] ");
} catch (IllegalArgumentException e) {
} catch (IOException e) {
fail("expected IAE");
}
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class CommonGramsTokenFilterFactoryTests method testWithoutCommonWordsMatch.
public void testWithoutCommonWordsMatch() throws IOException {
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_default.type", "common_grams").putList("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein").put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the", "quick", "brown", "is", "a", "fox", "Or", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_default.type", "common_grams").put("index.analysis.filter.common_grams_default.query_mode", false).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).putList("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein").build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the", "quick", "brown", "is", "a", "fox", "Or", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
}
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class CommonGramsTokenFilterFactoryTests method testQueryModeCommonGramsAnalysis.
public void testQueryModeCommonGramsAnalysis() throws IOException {
String json = "/org/opensearch/analysis/common/commongrams_query_mode.json";
Settings settings = Settings.builder().loadFromStream(json, getClass().getResourceAsStream(json), false).put(Environment.PATH_HOME_SETTING.getKey(), createHome()).build();
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class DisableGraphQueryTests method setup.
@Before
public void setup() {
Settings settings = Settings.builder().put("index.analysis.filter.shingle.type", "shingle").put("index.analysis.filter.shingle.output_unigrams", false).put("index.analysis.filter.shingle.min_size", 2).put("index.analysis.filter.shingle.max_size", 2).put("index.analysis.filter.shingle_unigram.type", "shingle").put("index.analysis.filter.shingle_unigram.output_unigrams", true).put("index.analysis.filter.shingle_unigram.min_size", 2).put("index.analysis.filter.shingle_unigram.max_size", 2).put("index.analysis.analyzer.text_shingle.tokenizer", "whitespace").put("index.analysis.analyzer.text_shingle.filter", "lowercase, shingle").put("index.analysis.analyzer.text_shingle_unigram.tokenizer", "whitespace").put("index.analysis.analyzer.text_shingle_unigram.filter", "lowercase, shingle_unigram").build();
indexService = createIndex("test", settings, "t", "text_shingle", "type=text,analyzer=text_shingle", "text_shingle_unigram", "type=text,analyzer=text_shingle_unigram");
shardContext = indexService.newQueryShardContext(0, null, () -> 0L, null);
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
// that ignores position length attribute
expectedQueryWithUnigram = new BooleanQuery.Builder().add(new SynonymQuery(new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar")), BooleanClause.Occur.SHOULD).add(new SynonymQuery(new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("text_shingle_unigram", "baz")), BooleanClause.Occur.SHOULD).build();
// parsed query for "text_shingle_unigram:\"foo bar baz\" with query parsers
// that ignores position length attribute
expectedPhraseQueryWithUnigram = new MultiPhraseQuery.Builder().add(new Term[] { new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar") }, 0).add(new Term[] { new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz") }, 1).add(new Term[] { new Term("text_shingle_unigram", "baz") }, 2).build();
// parsed query for "text_shingle:(foo bar baz)
expectedQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("text_shingle", "foo bar")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("text_shingle", "bar baz")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("text_shingle", "baz biz")), BooleanClause.Occur.SHOULD).build();
// parsed query for "text_shingle:"foo bar baz"
expectedPhraseQuery = new PhraseQuery.Builder().add(new Term("text_shingle", "foo bar")).add(new Term("text_shingle", "bar baz")).add(new Term("text_shingle", "baz biz")).build();
}
use of org.opensearch.common.settings.Settings in project OpenSearch by opensearch-project.
the class EdgeNGramTokenizerTests method buildAnalyzers.
private IndexAnalyzers buildAnalyzers(Version version, String tokenizer) throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).put("index.analysis.analyzer.my_analyzer.tokenizer", tokenizer).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
return new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
}
Aggregations