Search in sources :

Example 96 with Settings

use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnoreWhitespace.

/*
    * Setting alternate=shifted and variableTop to shift whitespace, but not
    * punctuation or symbols, to quaternary level
    */
public void testIgnoreWhitespace() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").put("index.analysis.filter.myCollator.variableTop", " ").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "foo bar", "foobar");
    // now assert that punctuation still matters: foo-bar < foo bar
    assertCollation(filterFactory, "foo-bar", "foo bar", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 97 with Settings

use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnorePunctuation.

/*
    * Setting alternate=shifted to shift whitespace, punctuation and symbols
    * to quaternary level
    */
public void testIgnorePunctuation() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "foo-bar", "foo bar");
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 98 with Settings

use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnoreAccentsButNotCase.

/*
    * Setting caseLevel=true to create an additional case level between
    * secondary and tertiary
    */
public void testIgnoreAccentsButNotCase() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.caseLevel", "true").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "résumé", "resume");
    assertCollatesToSame(filterFactory, "Résumé", "Resume");
    // now assert that case still matters: resume < Resume
    assertCollation(filterFactory, "resume", "Resume", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 99 with Settings

use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testNumerics.

/*
    * Setting numeric to encode digits with numeric value, so that
    * foobar-9 sorts before foobar-10
    */
public void testNumerics() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.numeric", "true").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollation(filterFactory, "foobar-9", "foobar-10", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 100 with Settings

use of org.elasticsearch.common.settings.Settings in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testCustomRules.

/*
    * For german, you might want oe to sort and match with o umlaut.
    * This is not the default, but you can make a customized ruleset to do this.
    *
    * The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
    *  http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
    */
public void testCustomRules() throws Exception {
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
    String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.rules", tailoredRules).put("index.analysis.filter.myCollator.strength", "primary").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "Töne", "Toene");
}
Also used : RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) ULocale(com.ibm.icu.util.ULocale) Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Aggregations

Settings (org.elasticsearch.common.settings.Settings)866 IndexSettings (org.elasticsearch.index.IndexSettings)112 Path (java.nio.file.Path)91 IOException (java.io.IOException)83 ClusterState (org.elasticsearch.cluster.ClusterState)76 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)72 ClusterSettings (org.elasticsearch.common.settings.ClusterSettings)68 HashMap (java.util.HashMap)66 ArrayList (java.util.ArrayList)64 Version (org.elasticsearch.Version)63 Environment (org.elasticsearch.env.Environment)63 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)61 Test (org.junit.Test)60 Map (java.util.Map)55 Index (org.elasticsearch.index.Index)55 Matchers.containsString (org.hamcrest.Matchers.containsString)54 List (java.util.List)45 ThreadPool (org.elasticsearch.threadpool.ThreadPool)40 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)37 MetaData (org.elasticsearch.cluster.metadata.MetaData)36