use of org.opensearch.index.analysis.IndexAnalyzers in project OpenSearch by opensearch-project.
the class CommonGramsTokenFilterFactoryTests method testQueryModeCommonGramsAnalysis.
public void testQueryModeCommonGramsAnalysis() throws IOException {
String json = "/org/opensearch/analysis/common/commongrams_query_mode.json";
Settings settings = Settings.builder().loadFromStream(json, getClass().getResourceAsStream(json), false).put(Environment.PATH_HOME_SETTING.getKey(), createHome()).build();
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
}
use of org.opensearch.index.analysis.IndexAnalyzers in project OpenSearch by opensearch-project.
the class CompoundAnalysisTests method analyze.
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = createAnalysisModule(settings);
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
TokenStream stream = analyzer.tokenStream("", text);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
use of org.opensearch.index.analysis.IndexAnalyzers in project OpenSearch by opensearch-project.
the class StemmerTokenFilterFactoryTests method testEnglishFilterFactory.
public void testEnglishFilterFactory() throws IOException {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer").put("index.analysis.filter.my_english.language", "english").put("index.analysis.analyzer.my_english.tokenizer", "whitespace").put("index.analysis.analyzer.my_english.filter", "my_english").put(SETTING_VERSION_CREATED, v).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream create = tokenFilter.create(tokenizer);
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_english");
assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[] { "consolingli" });
}
}
use of org.opensearch.index.analysis.IndexAnalyzers in project OpenSearch by opensearch-project.
the class PatternCaptureTokenFilterTests method testPatternCaptureTokenFilter.
public void testPatternCaptureTokenFilter() throws Exception {
String json = "/org/opensearch/analysis/common/pattern_capture.json";
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).loadFromStream(json, getClass().getResourceAsStream(json), false).put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
NamedAnalyzer analyzer1 = indexAnalyzers.get("single");
assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[] { "foobarbaz", "foobar", "foo" });
NamedAnalyzer analyzer2 = indexAnalyzers.get("multi");
assertTokenStreamContents(analyzer2.tokenStream("test", "abc123def"), new String[] { "abc123def", "abc", "123", "def" });
NamedAnalyzer analyzer3 = indexAnalyzers.get("preserve");
assertTokenStreamContents(analyzer3.tokenStream("test", "foobarbaz"), new String[] { "foobar", "foo" });
}
use of org.opensearch.index.analysis.IndexAnalyzers in project OpenSearch by opensearch-project.
the class SearchAsYouTypeFieldMapperTests method createIndexAnalyzers.
@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
NamedAnalyzer simple = new NamedAnalyzer("simple", AnalyzerScope.INDEX, new SimpleAnalyzer());
NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
return new IndexAnalyzers(org.opensearch.common.collect.Map.of("default", dflt, "standard", standard, "keyword", keyword, "simple", simple, "whitespace", whitespace), org.opensearch.common.collect.Map.of(), org.opensearch.common.collect.Map.of());
}
Aggregations