use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.
the class AnalysisModuleTests method testRegisterHunspellDictionary.
public void testRegisterHunspellDictionary() throws Exception {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
Environment environment = new Environment(settings);
InputStream aff = getClass().getResourceAsStream("/indices/analyze/conf_dir/hunspell/en_US/en_US.aff");
InputStream dic = getClass().getResourceAsStream("/indices/analyze/conf_dir/hunspell/en_US/en_US.dic");
Dictionary dictionary;
try (Directory tmp = new SimpleFSDirectory(environment.tmpFile())) {
dictionary = new Dictionary(tmp, "hunspell", aff, dic);
}
AnalysisModule module = new AnalysisModule(environment, singletonList(new AnalysisPlugin() {
@Override
public Map<String, Dictionary> getHunspellDictionaries() {
return singletonMap("foo", dictionary);
}
}));
assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
}
use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.
the class HunspellServiceTests method testLocaleDirectoryWithNodeLevelConfig.
public void testLocaleDirectoryWithNodeLevelConfig() throws Exception {
Settings settings = Settings.builder().put(Environment.PATH_CONF_SETTING.getKey(), getDataPath("/indices/analyze/conf_dir")).put(HUNSPELL_LAZY_LOAD.getKey(), randomBoolean()).put(HUNSPELL_IGNORE_CASE.getKey(), true).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
Dictionary dictionary = new HunspellService(settings, new Environment(settings), emptyMap()).getDictionary("en_US");
assertThat(dictionary, notNullValue());
assertTrue(dictionary.getIgnoreCase());
}
use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.
the class HunspellService method loadDictionary.
/**
* Loads the hunspell dictionary for the given local.
*
* @param locale The locale of the hunspell dictionary to be loaded.
* @param nodeSettings The node level settings
* @param env The node environment (from which the conf path will be resolved)
* @return The loaded Hunspell dictionary
* @throws Exception when loading fails (due to IO errors or malformed dictionary files)
*/
private Dictionary loadDictionary(String locale, Settings nodeSettings, Environment env) throws Exception {
if (logger.isDebugEnabled()) {
logger.debug("Loading hunspell dictionary [{}]...", locale);
}
Path dicDir = hunspellDir.resolve(locale);
if (FileSystemUtils.isAccessibleDirectory(dicDir, logger) == false) {
throw new ElasticsearchException(String.format(Locale.ROOT, "Could not find hunspell dictionary [%s]", locale));
}
// merging node settings with hunspell dictionary specific settings
Settings dictSettings = HUNSPELL_DICTIONARY_OPTIONS.get(nodeSettings);
nodeSettings = loadDictionarySettings(dicDir, dictSettings.getByPrefix(locale + "."));
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
Path[] affixFiles = FileSystemUtils.files(dicDir, "*.aff");
if (affixFiles.length == 0) {
throw new ElasticsearchException(String.format(Locale.ROOT, "Missing affix file for hunspell dictionary [%s]", locale));
}
if (affixFiles.length != 1) {
throw new ElasticsearchException(String.format(Locale.ROOT, "Too many affix files exist for hunspell dictionary [%s]", locale));
}
InputStream affixStream = null;
Path[] dicFiles = FileSystemUtils.files(dicDir, "*.dic");
List<InputStream> dicStreams = new ArrayList<>(dicFiles.length);
try {
for (int i = 0; i < dicFiles.length; i++) {
dicStreams.add(Files.newInputStream(dicFiles[i]));
}
affixStream = Files.newInputStream(affixFiles[0]);
try (Directory tmp = new SimpleFSDirectory(env.tmpFile())) {
return new Dictionary(tmp, "hunspell", affixStream, dicStreams, ignoreCase);
}
} catch (Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("Could not load hunspell dictionary [{}]", locale), e);
throw e;
} finally {
IOUtils.close(affixStream);
IOUtils.close(dicStreams);
}
}
use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.
the class HunspellServiceTests method testLocaleDirectoryWithLocaleSpecificConfig.
public void testLocaleDirectoryWithLocaleSpecificConfig() throws Exception {
Settings settings = Settings.builder().put(Environment.PATH_CONF_SETTING.getKey(), getDataPath("/indices/analyze/conf_dir")).put(HUNSPELL_LAZY_LOAD.getKey(), randomBoolean()).put(HUNSPELL_IGNORE_CASE.getKey(), true).put("indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing", false).put("indices.analysis.hunspell.dictionary.en_US.ignore_case", false).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
Dictionary dictionary = new HunspellService(settings, new Environment(settings), emptyMap()).getDictionary("en_US");
assertThat(dictionary, notNullValue());
assertFalse(dictionary.getIgnoreCase());
// testing that dictionary specific settings override node level settings
dictionary = new HunspellService(settings, new Environment(settings), emptyMap()).getDictionary("en_US_custom");
assertThat(dictionary, notNullValue());
assertTrue(dictionary.getIgnoreCase());
}
Aggregations