Search in sources :

Example 71 with Index

use of org.elasticsearch.index.Index in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnorePunctuation.

/*
    * Setting alternate=shifted to shift whitespace, punctuation and symbols
    * to quaternary level
    */
public void testIgnorePunctuation() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.alternate", "shifted").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "foo-bar", "foo bar");
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 72 with Index

use of org.elasticsearch.index.Index in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testIgnoreAccentsButNotCase.

/*
    * Setting caseLevel=true to create an additional case level between
    * secondary and tertiary
    */
public void testIgnoreAccentsButNotCase() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.strength", "primary").put("index.analysis.filter.myCollator.caseLevel", "true").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "résumé", "resume");
    assertCollatesToSame(filterFactory, "Résumé", "Resume");
    // now assert that case still matters: resume < Resume
    assertCollation(filterFactory, "resume", "Resume", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 73 with Index

use of org.elasticsearch.index.Index in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testNumerics.

/*
    * Setting numeric to encode digits with numeric value, so that
    * foobar-9 sorts before foobar-10
    */
public void testNumerics() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.language", "en").put("index.analysis.filter.myCollator.numeric", "true").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollation(filterFactory, "foobar-9", "foobar-10", -1);
}
Also used : Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 74 with Index

use of org.elasticsearch.index.Index in project elasticsearch by elastic.

the class SimpleIcuCollationTokenFilterTests method testCustomRules.

/*
    * For german, you might want oe to sort and match with o umlaut.
    * This is not the default, but you can make a customized ruleset to do this.
    *
    * The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
    *  http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
    */
public void testCustomRules() throws Exception {
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
    String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();
    Settings settings = Settings.builder().put("index.analysis.filter.myCollator.type", "icu_collation").put("index.analysis.filter.myCollator.rules", tailoredRules).put("index.analysis.filter.myCollator.strength", "primary").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
    assertCollatesToSame(filterFactory, "Töne", "Toene");
}
Also used : RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) ULocale(com.ibm.icu.util.ULocale) Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Example 75 with Index

use of org.elasticsearch.index.Index in project elasticsearch by elastic.

the class SimpleIcuNormalizerCharFilterTests method testDefaultSetting.

public void testDefaultSetting() throws Exception {
    Settings settings = Settings.builder().put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer").build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");
    String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
    Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
    String expectedOutput = normalizer.normalize(input);
    CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input));
    char[] tempBuff = new char[10];
    StringBuilder output = new StringBuilder();
    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1)
            break;
        output.append(tempBuff, 0, length);
        assertEquals(output.toString(), normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length()))));
    }
    assertEquals(expectedOutput, output.toString());
}
Also used : Normalizer2(com.ibm.icu.text.Normalizer2) CharFilter(org.apache.lucene.analysis.CharFilter) StringReader(java.io.StringReader) Index(org.elasticsearch.index.Index) AnalysisICUPlugin(org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin) Settings(org.elasticsearch.common.settings.Settings)

Aggregations

Index (org.elasticsearch.index.Index)366 ShardId (org.elasticsearch.index.shard.ShardId)108 Settings (org.elasticsearch.common.settings.Settings)95 ClusterState (org.elasticsearch.cluster.ClusterState)88 ArrayList (java.util.ArrayList)79 IOException (java.io.IOException)74 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)65 IndexMetadata (org.elasticsearch.cluster.metadata.IndexMetadata)65 HashMap (java.util.HashMap)61 Map (java.util.Map)61 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)59 List (java.util.List)56 HashSet (java.util.HashSet)50 Path (java.nio.file.Path)45 IndexSettings (org.elasticsearch.index.IndexSettings)44 IndexService (org.elasticsearch.index.IndexService)43 Set (java.util.Set)40 Metadata (org.elasticsearch.cluster.metadata.Metadata)39 ClusterService (org.elasticsearch.cluster.service.ClusterService)39 ActionListener (org.elasticsearch.action.ActionListener)35