Search in sources :

Example 1 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class IcuTokenizerFactoryTests method createTestAnalysis.

private static TestAnalysis createTestAnalysis() throws IOException {
    InputStream keywords = IcuTokenizerFactoryTests.class.getResourceAsStream("KeywordTokenizer.rbbi");
    InputStream latin = IcuTokenizerFactoryTests.class.getResourceAsStream("Latin-dont-break-on-hyphens.rbbi");
    Path home = createTempDir();
    Path config = home.resolve("config");
    Files.createDirectory(config);
    Files.copy(keywords, config.resolve("KeywordTokenizer.rbbi"));
    Files.copy(latin, config.resolve("Latin-dont-break-on-hyphens.rbbi"));
    String json = "/org/elasticsearch/index/analysis/icu_analysis.json";
    Settings settings = Settings.builder().loadFromStream(json, IcuTokenizerFactoryTests.class.getResourceAsStream(json)).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), home).build();
    return createTestAnalysis(new Index("test", "_na_"), nodeSettings, settings, new AnalysisICUPlugin());
}
Also used : Path(java.nio.file.Path) InputStream(java.io.InputStream) Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 2 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuAnalysisTests method testDefaultsIcuAnalysis.

public void testDefaultsIcuAnalysis() throws IOException {
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisICUPlugin());
    TokenizerFactory tokenizerFactory = analysis.tokenizer.get("icu_tokenizer");
    assertThat(tokenizerFactory, instanceOf(IcuTokenizerFactory.class));
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer");
    assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));
    filterFactory = analysis.tokenFilter.get("icu_folding");
    assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class));
    filterFactory = analysis.tokenFilter.get("icu_collation");
    assertThat(filterFactory, instanceOf(IcuCollationTokenFilterFactory.class));
    filterFactory = analysis.tokenFilter.get("icu_transform");
    assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class));
    CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer");
    assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class));
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin)

Example 3 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testSecondaryStrength.

/*
    * Test secondary strength, for english case is not significant.
    */
public void testSecondaryStrength() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "secondary").put("index.analysis.filter.myCollator.decomposition", "no").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "TESTING", "testing");
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 4 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnoreWhitespace.

/*
    * Setting alternate=shifted and variableTop to shift whitespace, but not
    * punctuation or symbols, to quaternary level
    */
public void testIgnoreWhitespace() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").put("index.analysis.filter.myCollator.variableTop", " ").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "foo bar", "foobar");
    // now assert that punctuation still matters: foo-bar < foo bar
    assertCollation(filterFactory, "foo-bar", "foo bar", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 5 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnorePunctuation.

/*
    * Setting alternate=shifted to shift whitespace, punctuation and symbols
    * to quaternary level
    */
public void testIgnorePunctuation() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "foo-bar", "foo bar");
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Aggregations

Index (org.elasticsearch.index.Index)13 AnalysisICUPlugin (org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin)13 Settings (org.elasticsearch.common.settings.Settings)12 Normalizer2 (com.ibm.icu.text.Normalizer2)2 StringReader (java.io.StringReader)2 CharFilter (org.apache.lucene.analysis.CharFilter)2 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)1 ULocale (com.ibm.icu.util.ULocale)1 InputStream (java.io.InputStream)1 Path (java.nio.file.Path)1