Search in sources :

Example 11 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testBasicUsage.

/*
    * Turkish has some funny casing.
    * This test shows how you can solve this kind of thing easily with collation.
    * Instead of using LowerCaseFilter, use a turkish collator with primary strength.
    * Then things will sort and match correctly.
    */
public void testBasicUsage() throws Exception {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "tr").put("index.analysis.filter.myCollator.strength", "primary").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "I WİLL USE TURKİSH CASING", "ı will use turkish casıng");
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 12 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testNormalization.

/*
    * Test usage of the decomposition option for unicode normalization.
    */
public void testNormalization() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "tr").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.decomposition", "canonical").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "I WİLL USE TURKİSH CASING", "ı will use turkish casıng");
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 13 with AnalysisICUPlugin

use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testUpperCaseFirst.

/*
    * Setting caseFirst=upper to cause uppercase strings to sort
    * before lowercase ones.
    */
public void testUpperCaseFirst() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "tertiary").put("index.analysis.filter.myCollator.caseFirst", "upper").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollation(filterFactory, "Resume", "resume", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Aggregations

Index (org.elasticsearch.index.Index)13 AnalysisICUPlugin (org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin)13 Settings (org.elasticsearch.common.settings.Settings)12 Normalizer2 (com.ibm.icu.text.Normalizer2)2 StringReader (java.io.StringReader)2 CharFilter (org.apache.lucene.analysis.CharFilter)2 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)1 ULocale (com.ibm.icu.util.ULocale)1 InputStream (java.io.InputStream)1 Path (java.nio.file.Path)1