use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class IcuTokenizerFactoryTests method createTestAnalysis.
private static TestAnalysis createTestAnalysis() throws IOException {
InputStream keywords = IcuTokenizerFactoryTests.class.getResourceAsStream("KeywordTokenizer.rbbi");
InputStream latin = IcuTokenizerFactoryTests.class.getResourceAsStream("Latin-dont-break-on-hyphens.rbbi");
Path home = createTempDir();
Path config = home.resolve("config");
Files.createDirectory(config);
Files.copy(keywords, config.resolve("KeywordTokenizer.rbbi"));
Files.copy(latin, config.resolve("Latin-dont-break-on-hyphens.rbbi"));
String json = "/org/elasticsearch/index/analysis/icu_analysis.json";
Settings settings = Settings.builder().loadFromStream(json, IcuTokenizerFactoryTests.class.getResourceAsStream(json)).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), home).build();
return createTestAnalysis(new Index("test", "_na_"), nodeSettings, settings, new AnalysisICUPlugin());
}
use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuAnalysisTests method testDefaultsIcuAnalysis.
public void testDefaultsIcuAnalysis() throws IOException {
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisICUPlugin());
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("icu_tokenizer");
assertThat(tokenizerFactory, instanceOf(IcuTokenizerFactory.class));
TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer");
assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));
filterFactory = analysis.tokenFilter.get("icu_folding");
assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class));
filterFactory = analysis.tokenFilter.get("icu_collation");
assertThat(filterFactory, instanceOf(IcuCollationTokenFilterFactory.class));
filterFactory = analysis.tokenFilter.get("icu_transform");
assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class));
CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer");
assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class));
}
use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testSecondaryStrength.
/*
* Test secondary strength, for english case is not significant.
*/
public void testSecondaryStrength() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "secondary").put("index.analysis.filter.myCollator.decomposition", "no").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "TESTING", "testing");
}
use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnoreWhitespace.
/*
* Setting alternate=shifted and variableTop to shift whitespace, but not
* punctuation or symbols, to quaternary level
*/
public void testIgnoreWhitespace() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").put("index.analysis.filter.myCollator.variableTop", " ").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "foo bar", "foobar");
// now assert that punctuation still matters: foo-bar < foo bar
assertCollation(filterFactory, "foo-bar", "foo bar", -1);
}
use of org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method testIgnorePunctuation.
/*
* Setting alternate=shifted to shift whitespace, punctuation and symbols
* to quaternary level
*/
public void testIgnorePunctuation() throws IOException {
Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "foo-bar", "foo bar");
}
Aggregations