use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testBasicUsage.
/*
* Turkish has some funny casing.
* This test shows how you can solve this kind of thing easily with collation.
* Instead of using LowerCaseFilter, use a turkish collator with primary strength.
* Then things will sort and match correctly.
*/
public void testBasicUsage() throws Exception {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "tr").put("index.analysis.filter.myCollator.strength", "primary").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "I WİLL USE TURKİSH CASING", "ı will use turkish casıng");
}
use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testNormalization.
/*
* Test usage of the decomposition option for unicode normalization.
*/
public void testNormalization() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "tr").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.decomposition", "canonical").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "I WİLL USE TURKİSH CASING", "ı will use turkish casıng");
}
use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testUpperCaseFirst.
/*
* Setting caseFirst=upper to cause uppercase strings to sort
* before lowercase ones.
*/
public void testUpperCaseFirst() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "tertiary").put("index.analysis.filter.myCollator.caseFirst", "upper").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollation(filterFactory, "Resume", "resume", -1);
}
Aggregations