use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.
the class AnalysisModuleTests method testRegisterHunspellDictionary.
public void testRegisterHunspellDictionary() throws Exception {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
Environment environment = TestEnvironment.newEnvironment(settings);
InputStream aff = getClass().getResourceAsStream("/indices/analyze/conf_dir/hunspell/en_US/en_US.aff");
InputStream dic = getClass().getResourceAsStream("/indices/analyze/conf_dir/hunspell/en_US/en_US.dic");
Dictionary dictionary;
try (Directory tmp = new NIOFSDirectory(environment.tmpFile())) {
dictionary = new Dictionary(tmp, "hunspell", aff, dic);
}
AnalysisModule module = new AnalysisModule(environment, singletonList(new AnalysisPlugin() {
@Override
public Map<String, Dictionary> getHunspellDictionaries() {
return singletonMap("foo", dictionary);
}
}));
assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
}
use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.
the class AnalysisModuleTests method testPluginPreConfiguredCharFilters.
/**
* Tests that plugins can register pre-configured char filters that vary in behavior based on OpenSearch version, Lucene version,
* and that do not vary based on version at all.
*/
public void testPluginPreConfiguredCharFilters() throws IOException {
boolean noVersionSupportsMultiTerm = randomBoolean();
boolean luceneVersionSupportsMultiTerm = randomBoolean();
boolean opensearchVersionSupportsMultiTerm = randomBoolean();
AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {
@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return Arrays.asList(PreConfiguredCharFilter.singleton("no_version", noVersionSupportsMultiTerm, tokenStream -> new AppendCharFilter(tokenStream, "no_version")), PreConfiguredCharFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, (tokenStream, luceneVersion) -> new AppendCharFilter(tokenStream, luceneVersion.toString())), PreConfiguredCharFilter.openSearchVersion("opensearch_version", opensearchVersionSupportsMultiTerm, (tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString())));
}
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
// Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash.
return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
})).getAnalysisRegistry();
Version version = VersionUtils.randomVersion(random());
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "keyword").put("index.analysis.analyzer.no_version.char_filter", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "keyword").put("index.analysis.analyzer.lucene_version.char_filter", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "keyword").put("index.analysis.analyzer.opensearch_version.char_filter", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "testno_version" });
assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { "test" + version.luceneVersion });
assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { "test" + version });
assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), analyzers.get("no_version").normalize("", "test").utf8ToString());
assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), analyzers.get("lucene_version").normalize("", "test").utf8ToString());
assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""), analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.
the class AnalysisModuleTests method testPluginPreConfiguredTokenFilters.
/**
* Tests that plugins can register pre-configured token filters that vary in behavior based on OpenSearch version, Lucene version,
* and that do not vary based on version at all.
*/
public void testPluginPreConfiguredTokenFilters() throws IOException {
boolean noVersionSupportsMultiTerm = randomBoolean();
boolean luceneVersionSupportsMultiTerm = randomBoolean();
boolean opensearchVersionSupportsMultiTerm = randomBoolean();
AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings), singletonList(new AnalysisPlugin() {
@Override
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
return Arrays.asList(PreConfiguredTokenFilter.singleton("no_version", noVersionSupportsMultiTerm, tokenStream -> new AppendTokenFilter(tokenStream, "no_version")), PreConfiguredTokenFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, (tokenStream, luceneVersion) -> new AppendTokenFilter(tokenStream, luceneVersion.toString())), PreConfiguredTokenFilter.openSearchVersion("opensearch_version", opensearchVersionSupportsMultiTerm, (tokenStream, esVersion) -> new AppendTokenFilter(tokenStream, esVersion.toString())));
}
})).getAnalysisRegistry();
Version version = VersionUtils.randomVersion(random());
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder().put("index.analysis.analyzer.no_version.tokenizer", "standard").put("index.analysis.analyzer.no_version.filter", "no_version").put("index.analysis.analyzer.lucene_version.tokenizer", "standard").put("index.analysis.analyzer.lucene_version.filter", "lucene_version").put("index.analysis.analyzer.opensearch_version.tokenizer", "standard").put("index.analysis.analyzer.opensearch_version.filter", "opensearch_version").put(IndexMetadata.SETTING_VERSION_CREATED, version).build());
assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] { "testno_version" });
assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] { "test" + version.luceneVersion });
assertTokenStreamContents(analyzers.get("opensearch_version").tokenStream("", "test"), new String[] { "test" + version });
assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), analyzers.get("no_version").normalize("", "test").utf8ToString());
assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), analyzers.get("lucene_version").normalize("", "test").utf8ToString());
assertEquals("test" + (opensearchVersionSupportsMultiTerm ? version.toString() : ""), analyzers.get("opensearch_version").normalize("", "test").utf8ToString());
}
use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.
the class AnalysisModule method setupPreConfiguredTokenFilters.
static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");
// Add filters available in lucene-core
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
// Add "standard" for old indices (bwc)
preConfiguredTokenFilters.register("standard", PreConfiguredTokenFilter.openSearchVersion("standard", true, (reader, version) -> {
// until version 7_5_2
if (version.before(LegacyESVersion.V_7_6_0)) {
deprecationLogger.deprecate("standard_deprecation", "The [standard] token filter is deprecated and will be removed in a future version.");
} else {
throw new IllegalArgumentException("The [standard] token filter has been removed.");
}
return reader;
}));
for (AnalysisPlugin plugin : plugins) {
for (PreConfiguredTokenFilter filter : plugin.getPreConfiguredTokenFilters()) {
preConfiguredTokenFilters.register(filter.getName(), filter);
}
}
return unmodifiableMap(preConfiguredTokenFilters.getRegistry());
}
use of org.opensearch.plugins.AnalysisPlugin in project OpenSearch by opensearch-project.
the class AnalysisModule method setupPreConfiguredTokenizers.
static Map<String, PreConfiguredTokenizer> setupPreConfiguredTokenizers(List<AnalysisPlugin> plugins) {
NamedRegistry<PreConfiguredTokenizer> preConfiguredTokenizers = new NamedRegistry<>("pre-configured tokenizer");
// Temporary shim to register old style pre-configured tokenizers
for (PreBuiltTokenizers tokenizer : PreBuiltTokenizers.values()) {
String name = tokenizer.name().toLowerCase(Locale.ROOT);
PreConfiguredTokenizer preConfigured;
switch(tokenizer.getCachingStrategy()) {
case ONE:
preConfigured = PreConfiguredTokenizer.singleton(name, () -> tokenizer.create(Version.CURRENT));
break;
default:
throw new UnsupportedOperationException("Caching strategy unsupported by temporary shim [" + tokenizer + "]");
}
preConfiguredTokenizers.register(name, preConfigured);
}
for (AnalysisPlugin plugin : plugins) {
for (PreConfiguredTokenizer tokenizer : plugin.getPreConfiguredTokenizers()) {
preConfiguredTokenizers.register(tokenizer.getName(), tokenizer);
}
}
return unmodifiableMap(preConfiguredTokenizers.getRegistry());
}
Aggregations