Search in sources :

Example 1 with Dictionary

use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.

the class AnalysisModuleTests method testRegisterHunspellDictionary.

public void testRegisterHunspellDictionary() throws Exception {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    Environment environment = new Environment(settings);
    InputStream aff = getClass().getResourceAsStream("/indices/analyze/conf_dir/hunspell/en_US/en_US.aff");
    InputStream dic = getClass().getResourceAsStream("/indices/analyze/conf_dir/hunspell/en_US/en_US.dic");
    Dictionary dictionary;
    try (Directory tmp = new SimpleFSDirectory(environment.tmpFile())) {
        dictionary = new Dictionary(tmp, "hunspell", aff, dic);
    }
    AnalysisModule module = new AnalysisModule(environment, singletonList(new AnalysisPlugin() {

        @Override
        public Map<String, Dictionary> getHunspellDictionaries() {
            return singletonMap("foo", dictionary);
        }
    }));
    assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
}
Also used : Dictionary(org.apache.lucene.analysis.hunspell.Dictionary) InputStream(java.io.InputStream) Environment(org.elasticsearch.env.Environment) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) Directory(org.apache.lucene.store.Directory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) AnalysisPlugin(org.elasticsearch.plugins.AnalysisPlugin)

Example 2 with Dictionary

use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.

the class HunspellServiceTests method testLocaleDirectoryWithNodeLevelConfig.

public void testLocaleDirectoryWithNodeLevelConfig() throws Exception {
    Settings settings = Settings.builder().put(Environment.PATH_CONF_SETTING.getKey(), getDataPath("/indices/analyze/conf_dir")).put(HUNSPELL_LAZY_LOAD.getKey(), randomBoolean()).put(HUNSPELL_IGNORE_CASE.getKey(), true).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
    Dictionary dictionary = new HunspellService(settings, new Environment(settings), emptyMap()).getDictionary("en_US");
    assertThat(dictionary, notNullValue());
    assertTrue(dictionary.getIgnoreCase());
}
Also used : Dictionary(org.apache.lucene.analysis.hunspell.Dictionary) HunspellService(org.elasticsearch.indices.analysis.HunspellService) Environment(org.elasticsearch.env.Environment) Settings(org.elasticsearch.common.settings.Settings)

Example 3 with Dictionary

use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.

the class HunspellService method loadDictionary.

/**
     * Loads the hunspell dictionary for the given local.
     *
     * @param locale       The locale of the hunspell dictionary to be loaded.
     * @param nodeSettings The node level settings
     * @param env          The node environment (from which the conf path will be resolved)
     * @return The loaded Hunspell dictionary
     * @throws Exception when loading fails (due to IO errors or malformed dictionary files)
     */
private Dictionary loadDictionary(String locale, Settings nodeSettings, Environment env) throws Exception {
    if (logger.isDebugEnabled()) {
        logger.debug("Loading hunspell dictionary [{}]...", locale);
    }
    Path dicDir = hunspellDir.resolve(locale);
    if (FileSystemUtils.isAccessibleDirectory(dicDir, logger) == false) {
        throw new ElasticsearchException(String.format(Locale.ROOT, "Could not find hunspell dictionary [%s]", locale));
    }
    // merging node settings with hunspell dictionary specific settings
    Settings dictSettings = HUNSPELL_DICTIONARY_OPTIONS.get(nodeSettings);
    nodeSettings = loadDictionarySettings(dicDir, dictSettings.getByPrefix(locale + "."));
    boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
    Path[] affixFiles = FileSystemUtils.files(dicDir, "*.aff");
    if (affixFiles.length == 0) {
        throw new ElasticsearchException(String.format(Locale.ROOT, "Missing affix file for hunspell dictionary [%s]", locale));
    }
    if (affixFiles.length != 1) {
        throw new ElasticsearchException(String.format(Locale.ROOT, "Too many affix files exist for hunspell dictionary [%s]", locale));
    }
    InputStream affixStream = null;
    Path[] dicFiles = FileSystemUtils.files(dicDir, "*.dic");
    List<InputStream> dicStreams = new ArrayList<>(dicFiles.length);
    try {
        for (int i = 0; i < dicFiles.length; i++) {
            dicStreams.add(Files.newInputStream(dicFiles[i]));
        }
        affixStream = Files.newInputStream(affixFiles[0]);
        try (Directory tmp = new SimpleFSDirectory(env.tmpFile())) {
            return new Dictionary(tmp, "hunspell", affixStream, dicStreams, ignoreCase);
        }
    } catch (Exception e) {
        logger.error((Supplier<?>) () -> new ParameterizedMessage("Could not load hunspell dictionary [{}]", locale), e);
        throw e;
    } finally {
        IOUtils.close(affixStream);
        IOUtils.close(dicStreams);
    }
}
Also used : Path(java.nio.file.Path) Dictionary(org.apache.lucene.analysis.hunspell.Dictionary) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) ElasticsearchException(org.elasticsearch.ElasticsearchException) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) ElasticsearchException(org.elasticsearch.ElasticsearchException) IOException(java.io.IOException) Supplier(org.apache.logging.log4j.util.Supplier) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Settings(org.elasticsearch.common.settings.Settings) Directory(org.apache.lucene.store.Directory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory)

Example 4 with Dictionary

use of org.apache.lucene.analysis.hunspell.Dictionary in project elasticsearch by elastic.

the class HunspellServiceTests method testLocaleDirectoryWithLocaleSpecificConfig.

public void testLocaleDirectoryWithLocaleSpecificConfig() throws Exception {
    Settings settings = Settings.builder().put(Environment.PATH_CONF_SETTING.getKey(), getDataPath("/indices/analyze/conf_dir")).put(HUNSPELL_LAZY_LOAD.getKey(), randomBoolean()).put(HUNSPELL_IGNORE_CASE.getKey(), true).put("indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing", false).put("indices.analysis.hunspell.dictionary.en_US.ignore_case", false).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
    Dictionary dictionary = new HunspellService(settings, new Environment(settings), emptyMap()).getDictionary("en_US");
    assertThat(dictionary, notNullValue());
    assertFalse(dictionary.getIgnoreCase());
    // testing that dictionary specific settings override node level settings
    dictionary = new HunspellService(settings, new Environment(settings), emptyMap()).getDictionary("en_US_custom");
    assertThat(dictionary, notNullValue());
    assertTrue(dictionary.getIgnoreCase());
}
Also used : Dictionary(org.apache.lucene.analysis.hunspell.Dictionary) HunspellService(org.elasticsearch.indices.analysis.HunspellService) Environment(org.elasticsearch.env.Environment) Settings(org.elasticsearch.common.settings.Settings)

Aggregations

Dictionary (org.apache.lucene.analysis.hunspell.Dictionary)4 Settings (org.elasticsearch.common.settings.Settings)4 Environment (org.elasticsearch.env.Environment)3 InputStream (java.io.InputStream)2 Directory (org.apache.lucene.store.Directory)2 SimpleFSDirectory (org.apache.lucene.store.SimpleFSDirectory)2 HunspellService (org.elasticsearch.indices.analysis.HunspellService)2 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 ArrayList (java.util.ArrayList)1 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)1 Supplier (org.apache.logging.log4j.util.Supplier)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1 IndexSettings (org.elasticsearch.index.IndexSettings)1 AnalysisPlugin (org.elasticsearch.plugins.AnalysisPlugin)1