use of org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider in project elasticsearch by elastic.
the class CompoundAnalysisTests method testDefaultsCompoundAnalysis.
public void testDefaultsCompoundAnalysis() throws Exception {
Settings settings = getJsonSettings();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
}
}));
TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
}
use of org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider in project elasticsearch by elastic.
the class CompoundAnalysisTests method analyze.
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
}
}));
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", text, 1.0f);
TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
use of org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider in project elasticsearch by elastic.
the class AnalysisRegistry method buildMapping.
private <T> Map<String, T> buildMapping(Component component, IndexSettings settings, Map<String, Settings> settingsMap, Map<String, AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, AnalysisModule.AnalysisProvider<T>> defaultInstance) throws IOException {
Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, settings.getIndexVersionCreated()).build();
Map<String, T> factories = new HashMap<>();
for (Map.Entry<String, Settings> entry : settingsMap.entrySet()) {
String name = entry.getKey();
Settings currentSettings = entry.getValue();
String typeName = currentSettings.get("type");
if (component == Component.ANALYZER) {
T factory = null;
if (typeName == null) {
if (currentSettings.get("tokenizer") != null) {
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
} else {
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
} else if (typeName.equals("custom")) {
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
}
if (factory != null) {
factories.put(name, factory);
continue;
}
} else if (component == Component.NORMALIZER) {
if (typeName == null || typeName.equals("custom")) {
T factory = (T) new CustomNormalizerProvider(settings, name, currentSettings);
factories.put(name, factory);
continue;
}
}
AnalysisProvider<T> type = getAnalysisProvider(component, providerMap, name, typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
}
final T factory = type.get(settings, environment, name, currentSettings);
factories.put(name, factory);
}
// go over the char filters in the bindings and register the ones that are not configured
for (Map.Entry<String, AnalysisModule.AnalysisProvider<T>> entry : providerMap.entrySet()) {
String name = entry.getKey();
AnalysisModule.AnalysisProvider<T> provider = entry.getValue();
// we don't want to re-register one that already exists
if (settingsMap.containsKey(name)) {
continue;
}
// check, if it requires settings, then don't register it, we know default has no settings...
if (provider.requiresAnalysisSettings()) {
continue;
}
AnalysisModule.AnalysisProvider<T> defaultProvider = defaultInstance.get(name);
final T instance;
if (defaultProvider == null) {
instance = provider.get(settings, environment, name, defaultSettings);
} else {
instance = defaultProvider.get(settings, environment, name, defaultSettings);
}
factories.put(name, instance);
}
for (Map.Entry<String, AnalysisModule.AnalysisProvider<T>> entry : defaultInstance.entrySet()) {
final String name = entry.getKey();
final AnalysisModule.AnalysisProvider<T> provider = entry.getValue();
if (factories.containsKey(name) == false) {
final T instance = provider.get(settings, environment, name, defaultSettings);
if (factories.containsKey(name) == false) {
factories.put(name, instance);
}
}
}
return factories;
}
use of org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider in project elasticsearch by elastic.
the class AnalysisRegistry method getTokenFilterProvider.
/**
* Returns a registered {@link TokenFilterFactory} provider by {@link IndexSettings}
* or a registered {@link TokenFilterFactory} provider by predefined name
* or <code>null</code> if the tokenFilter was not registered
* @param tokenFilter global or defined tokenFilter name
* @param indexSettings an index settings
* @return {@link TokenFilterFactory} provider or <code>null</code>
*/
public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
if (tokenFilterSettings.containsKey(tokenFilter)) {
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
String typeName = currentSettings.get("type");
/*
* synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
if ("synonym".equals(typeName)) {
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
} else if ("synonym_graph".equals(typeName)) {
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
} else {
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
}
} else {
return getTokenFilterProvider(tokenFilter);
}
}
Aggregations