use of org.elasticsearch.index.Index in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnorePunctuation.
/*
* Setting alternate=shifted to shift whitespace, punctuation and symbols
* to quaternary level
*/
public void testIgnorePunctuation() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "foo-bar", "foo bar");
}
use of org.elasticsearch.index.Index in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnoreAccentsButNotCase.
/*
* Setting caseLevel=true to create an additional case level between
* secondary and tertiary
*/
public void testIgnoreAccentsButNotCase() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.caseLevel", "true").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "résumé", "resume");
assertCollatesToSame(filterFactory, "Résumé", "Resume");
// now assert that case still matters: resume < Resume
assertCollation(filterFactory, "resume", "Resume", -1);
}
use of org.elasticsearch.index.Index in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testNumerics.
/*
* Setting numeric to encode digits with numeric value, so that
* foobar-9 sorts before foobar-10
*/
public void testNumerics() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.numeric", "true").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollation(filterFactory, "foobar-9", "foobar-10", -1);
}
use of org.elasticsearch.index.Index in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testCustomRules.
/*
* For german, you might want oe to sort and match with o umlaut.
* This is not the default, but you can make a customized ruleset to do this.
*
* The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
*/
public void testCustomRules() throws Exception {
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.rules", tailoredRules).put("index.analysis.filter.myCollator.strength", "primary").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "Töne", "Toene");
}
use of org.elasticsearch.index.Index in project elasticsearch by elastic.
the class SimpleIcuNormalizerCharFilterTests method testDefaultSetting.
public void testDefaultSetting() throws Exception {
Settings settings = Settings.builder().put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");
String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
String expectedOutput = normalizer.normalize(input);
CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input));
char[] tempBuff = new char[10];
StringBuilder output = new StringBuilder();
while (true) {
int length = inputReader.read(tempBuff);
if (length == -1)
break;
output.append(tempBuff, 0, length);
assertEquals(output.toString(), normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length()))));
}
assertEquals(expectedOutput, output.toString());
}
Aggregations