use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnoreWhitespace.
/*
* Setting alternate=shifted and variableTop to shift whitespace, but not
* punctuation or symbols, to quaternary level
*/
public void testIgnoreWhitespace() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").put("index.analysis.filter.myCollator.variableTop", " ").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "foo bar", "foobar");
// now assert that punctuation still matters: foo-bar < foo bar
assertCollation(filterFactory, "foo-bar", "foo bar", -1);
}
use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnorePunctuation.
/*
* Setting alternate=shifted to shift whitespace, punctuation and symbols
* to quaternary level
*/
public void testIgnorePunctuation() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "foo-bar", "foo bar");
}
use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnoreAccentsButNotCase.
/*
* Setting caseLevel=true to create an additional case level between
* secondary and tertiary
*/
public void testIgnoreAccentsButNotCase() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.caseLevel", "true").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "résumé", "resume");
assertCollatesToSame(filterFactory, "Résumé", "Resume");
// now assert that case still matters: resume < Resume
assertCollation(filterFactory, "resume", "Resume", -1);
}
use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testNumerics.
/*
* Setting numeric to encode digits with numeric value, so that
* foobar-9 sorts before foobar-10
*/
public void testNumerics() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.numeric", "true").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollation(filterFactory, "foobar-9", "foobar-10", -1);
}
use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testCustomRules.
/*
* For german, you might want oe to sort and match with o umlaut.
* This is not the default, but you can make a customized ruleset to do this.
*
* The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
*/
public void testCustomRules() throws Exception {
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.rules", tailoredRules).put("index.analysis.filter.myCollator.strength", "primary").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "Töne", "Toene");
}
Aggregations