use of org.opensearch.index.analysis.NamedAnalyzer in project OpenSearch by opensearch-project.
the class ConcatenateGraphTokenFilterFactoryTests method testGraph.
public void testGraph() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_word_delimiter.type", "word_delimiter_graph").put("index.analysis.filter.my_word_delimiter.catenate_words", "true").put("index.analysis.filter.my_concatenate_graph.type", "concatenate_graph").put("index.analysis.analyzer.my_analyzer.type", "custom").put("index.analysis.analyzer.my_analyzer.tokenizer", "whitespace").put("index.analysis.analyzer.my_analyzer.filter", "my_word_delimiter, my_concatenate_graph").build(), new CommonAnalysisPlugin());
String source = "PowerShot Is AweSome";
// Expected output from Whitespace Tokenizer is: "PowerShot" --> "Is" --> "Awe" --> "Some"
// Expected output from word_delimiter_graph is a graph:
// <start> ---> "Power" --> "Shot" ---> "Is" ---> "Awe" ---> "Some" --- <end>
// | | | |
// --> "PowerShot" -------- --> "AweSome" ---------
// and this filter will traverse through all possible paths to produce concatenated tokens
String[] expected = new String[] { "Power Shot Is Awe Some", "Power Shot Is AweSome", "PowerShot Is Awe Some", "PowerShot Is AweSome" };
// all tokens will be in the same position
int[] expectedPosIncrements = new int[] { 1, 0, 0, 0 };
int[] expectedPosLengths = new int[] { 1, 1, 1, 1 };
NamedAnalyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
assertAnalyzesToPositions(analyzer, source, expected, expectedPosIncrements, expectedPosLengths);
}
use of org.opensearch.index.analysis.NamedAnalyzer in project OpenSearch by opensearch-project.
the class StemmerTokenFilterFactoryTests method testEnglishFilterFactory.
public void testEnglishFilterFactory() throws IOException {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer").put("index.analysis.filter.my_english.language", "english").put("index.analysis.analyzer.my_english.tokenizer", "whitespace").put("index.analysis.analyzer.my_english.filter", "my_english").put(SETTING_VERSION_CREATED, v).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream create = tokenFilter.create(tokenizer);
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_english");
assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[] { "consolingli" });
}
}
use of org.opensearch.index.analysis.NamedAnalyzer in project OpenSearch by opensearch-project.
the class PatternCaptureTokenFilterTests method testPatternCaptureTokenFilter.
public void testPatternCaptureTokenFilter() throws Exception {
String json = "/org/opensearch/analysis/common/pattern_capture.json";
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).loadFromStream(json, getClass().getResourceAsStream(json), false).put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
NamedAnalyzer analyzer1 = indexAnalyzers.get("single");
assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[] { "foobarbaz", "foobar", "foo" });
NamedAnalyzer analyzer2 = indexAnalyzers.get("multi");
assertTokenStreamContents(analyzer2.tokenStream("test", "abc123def"), new String[] { "abc123def", "abc", "123", "def" });
NamedAnalyzer analyzer3 = indexAnalyzers.get("preserve");
assertTokenStreamContents(analyzer3.tokenStream("test", "foobarbaz"), new String[] { "foobar", "foo" });
}
use of org.opensearch.index.analysis.NamedAnalyzer in project OpenSearch by opensearch-project.
the class SearchAsYouTypeFieldMapperTests method assertShingleFieldType.
private static void assertShingleFieldType(ShingleFieldType fieldType, int shingleSize, String analyzerName, PrefixFieldType prefixFieldType) {
assertThat(fieldType.shingleSize, equalTo(shingleSize));
for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.getTextSearchInfo().getSearchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
if (shingleSize > 1) {
final SearchAsYouTypeAnalyzer wrappedAnalyzer = (SearchAsYouTypeAnalyzer) analyzer.analyzer();
assertThat(wrappedAnalyzer.shingleSize(), equalTo(shingleSize));
assertThat(wrappedAnalyzer.indexPrefixes(), equalTo(false));
}
}
assertThat(fieldType.prefixFieldType, equalTo(prefixFieldType));
}
use of org.opensearch.index.analysis.NamedAnalyzer in project OpenSearch by opensearch-project.
the class SearchAsYouTypeFieldMapperTests method createIndexAnalyzers.
@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
NamedAnalyzer simple = new NamedAnalyzer("simple", AnalyzerScope.INDEX, new SimpleAnalyzer());
NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
return new IndexAnalyzers(org.opensearch.common.collect.Map.of("default", dflt, "standard", standard, "keyword", keyword, "simple", simple, "whitespace", whitespace), org.opensearch.common.collect.Map.of(), org.opensearch.common.collect.Map.of());
}
Aggregations