Search in sources :

Example 36 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationAPITest method TestGetContractions.

// capitst.c/TestGetContractionsAndUnsafes()
@Test
public void TestGetContractions() throws Exception {
    /*        static struct {
         const char* locale;
         const char* inConts;
         const char* outConts;
         const char* inExp;
         const char* outExp;
         const char* unsafeCodeUnits;
         const char* safeCodeUnits;
         }
         */
    String[][] tests = { { "ru", "[{\u0418\u0306}{\u0438\u0306}]", "[\u0439\u0457]", "[\u00e6]", "[ae]", "[\u0418\u0438]", "[aAbBxv]" }, { "uk", "[{\u0406\u0308}{\u0456\u0308}{\u0418\u0306}{\u0438\u0306}]", "[\u0407\u0419\u0439\u0457]", "[\u00e6]", "[ae]", "[\u0406\u0456\u0418\u0438]", "[aAbBxv]" }, { "sh", "[{C\u0301}{C\u030C}{C\u0341}{DZ\u030C}{Dz\u030C}{D\u017D}{D\u017E}{lj}{nj}]", "[{\u309d\u3099}{\u30fd\u3099}]", "[\u00e6]", "[a]", "[nlcdzNLCDZ]", "[jabv]" }, { "ja", /*
                     * The "collv2" builder omits mappings if the collator maps their
                     * character sequences to the same CEs.
                     * For example, it omits Japanese contractions for NFD forms
                     * of the voiced iteration mark (U+309E = U+309D + U+3099), such as
                     * {\u3053\u3099\u309D\u3099}{\u3053\u309D\u3099}
                     * {\u30B3\u3099\u30FD\u3099}{\u30B3\u30FD\u3099}.
                     * It does add mappings for the precomposed forms.
                     */
    "[{\u3053\u3099\u309D}{\u3053\u3099\u309E}{\u3053\u3099\u30FC}" + "{\u3053\u309D}{\u3053\u309E}{\u3053\u30FC}" + "{\u30B3\u3099\u30FC}{\u30B3\u3099\u30FD}{\u30B3\u3099\u30FE}" + "{\u30B3\u30FC}{\u30B3\u30FD}{\u30B3\u30FE}]", "[{\u30FD\u3099}{\u309D\u3099}{\u3053\u3099}{\u30B3\u3099}{lj}{nj}]", "[\u30FE\u00e6]", "[a]", "[\u3099]", "[]" } };
    RuleBasedCollator coll = null;
    int i = 0;
    UnicodeSet conts = new UnicodeSet();
    UnicodeSet exp = new UnicodeSet();
    UnicodeSet set = new UnicodeSet();
    for (i = 0; i < tests.length; i++) {
        logln("Testing locale: " + tests[i][0]);
        coll = (RuleBasedCollator) Collator.getInstance(new ULocale(tests[i][0]));
        coll.getContractionsAndExpansions(conts, exp, true);
        boolean ok = true;
        logln("Contractions " + conts.size() + ":\n" + conts.toPattern(true));
        ok &= doSetsTest(conts, set, tests[i][1], tests[i][2]);
        logln("Expansions " + exp.size() + ":\n" + exp.toPattern(true));
        ok &= doSetsTest(exp, set, tests[i][3], tests[i][4]);
        if (!ok) {
            // In case of failure, log the rule string for better diagnostics.
            String rules = coll.getRules(false);
            logln("Collation rules (getLocale()=" + coll.getLocale(ULocale.ACTUAL_LOCALE).toString() + "): " + Utility.escape(rules));
        }
    // No unsafe set in ICU4J
    // noConts = ucol_getUnsafeSet(coll, conts, &status);
    // doSetsTest(conts, set, tests[i][5], tests[i][6]);
    // log_verbose("Unsafes "+conts.size()+":\n"+conts.toPattern(true)+"\n");
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 37 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationAPITest method TestGetLocale.

@Test
public void TestGetLocale() {
    String rules = "&a<x<y<z";
    Collator coll = Collator.getInstance(new ULocale("root"));
    ULocale locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
    if (!locale.equals(ULocale.ROOT)) {
        errln("Collator.getInstance(\"root\").getLocale(actual) != ULocale.ROOT; " + "getLocale().getName() = \"" + locale.getName() + "\"");
    }
    coll = Collator.getInstance(new ULocale(""));
    locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
    if (!locale.equals(ULocale.ROOT)) {
        errln("Collator.getInstance(\"\").getLocale(actual) != ULocale.ROOT; " + "getLocale().getName() = \"" + locale.getName() + "\"");
    }
    int i = 0;
    String[][] testStruct = { // Note: ULocale.ROOT.getName() == "" not "root".
    { "de_DE", "de", "" }, { "sr_RS", "sr_Cyrl_RS", "sr" }, { "en_US_CALIFORNIA", "en_US", "" }, { "fr_FR_NONEXISTANT", "fr", "" }, // pinyin is the default, therefore suppressed.
    { "zh_CN", "zh_Hans_CN", "zh" }, // zh_Hant has default=stroke but the data is in zh.
    { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" }, { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" }, { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" } };
    /* test opening collators for different locales */
    for (i = 0; i < testStruct.length; i++) {
        String requestedLocale = testStruct[i][0];
        String validLocale = testStruct[i][1];
        String actualLocale = testStruct[i][2];
        try {
            coll = Collator.getInstance(new ULocale(requestedLocale));
        } catch (Exception e) {
            errln(String.format("Failed to open collator for %s with %s", requestedLocale, e));
            continue;
        }
        // Note: C++ getLocale() recognizes ULOC_REQUESTED_LOCALE
        // which does not exist in Java.
        locale = coll.getLocale(ULocale.VALID_LOCALE);
        if (!locale.equals(new ULocale(validLocale))) {
            errln(String.format("[Coll %s]: Error in valid locale, expected %s, got %s", requestedLocale, validLocale, locale.getName()));
        }
        locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
        if (!locale.equals(new ULocale(actualLocale))) {
            errln(String.format("[Coll %s]: Error in actual locale, expected %s, got %s", requestedLocale, actualLocale, locale.getName()));
        }
        // If we open a collator for the actual locale, we should get an equivalent one again.
        Collator coll2;
        try {
            coll2 = Collator.getInstance(locale);
        } catch (Exception e) {
            errln(String.format("Failed to open collator for actual locale \"%s\" with %s", locale.getName(), e));
            continue;
        }
        ULocale actual2 = coll2.getLocale(ULocale.ACTUAL_LOCALE);
        if (!actual2.equals(locale)) {
            errln(String.format("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"", locale.getName(), actual2.getName()));
        }
        if (!coll2.equals(coll)) {
            errln(String.format("[Coll actual \"%s\"]: Got different collator than before", locale.getName()));
        }
    }
    /* completely non-existent locale for collator should get a root collator */
    {
        try {
            coll = Collator.getInstance(new ULocale("blahaha"));
        } catch (Exception e) {
            errln("Failed to open collator with " + e);
            return;
        }
        ULocale valid = coll.getLocale(ULocale.VALID_LOCALE);
        String name = valid.getName();
        if (name.length() != 0 && !name.equals("root")) {
            errln("Valid locale for nonexisting locale collator is \"" + name + "\" not root");
        }
        ULocale actual = coll.getLocale(ULocale.ACTUAL_LOCALE);
        name = actual.getName();
        if (name.length() != 0 && !name.equals("root")) {
            errln("Actual locale for nonexisting locale collator is \"" + name + "\" not root");
        }
    }
    /* collator instantiated from rules should have all locales null */
    try {
        coll = new RuleBasedCollator(rules);
    } catch (Exception e) {
        errln("RuleBasedCollator(" + rules + ") failed: " + e);
        return;
    }
    locale = coll.getLocale(ULocale.VALID_LOCALE);
    if (locale != null) {
        errln(String.format("For collator instantiated from rules, valid locale %s is not bogus", locale.getName()));
    }
    locale = coll.getLocale(ULocale.ACTUAL_LOCALE);
    if (locale != null) {
        errln(String.format("For collator instantiated from rules, actual locale %s is not bogus", locale.getName()));
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) MissingResourceException(java.util.MissingResourceException) Collator(android.icu.text.Collator) RuleBasedCollator(android.icu.text.RuleBasedCollator) Test(org.junit.Test)

Example 38 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationAPITest method TestIterNumeric.

@Test
public void TestIterNumeric() throws Exception {
    // misnomer for Java, but parallel with C++ test
    // Regression test for ticket #9915.
    // The collation code sometimes masked the continuation marker away
    // but later tested the result for isContinuation().
    // This test case failed because the third bytes of the computed numeric-collation primaries
    // were permutated with the script reordering table.
    // It should have been possible to reproduce this with the root collator
    // and characters with appropriate 3-byte primary weights.
    // The effectiveness of this test depends completely on the collation elements
    // and on the implementation code.
    RuleBasedCollator coll = new RuleBasedCollator("[reorder Hang Hani]");
    coll.setNumericCollation(true);
    int result = coll.compare("40", "72");
    assertTrue("40<72", result < 0);
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) Test(org.junit.Test)

Example 39 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationAPITest method TestGetTailoredSet.

@Test
public void TestGetTailoredSet() {
    logln("testing getTailoredSet...");
    String[] rules = { "&a < \u212b", "& S < \u0161 <<< \u0160" };
    String[][] data = { { "\u212b", "A\u030a", "\u00c5" }, { "\u0161", "s\u030C", "\u0160", "S\u030C" } };
    int i = 0, j = 0;
    RuleBasedCollator coll;
    UnicodeSet set;
    for (i = 0; i < rules.length; i++) {
        try {
            logln("Instantiating a collator from " + rules[i]);
            coll = new RuleBasedCollator(rules[i]);
            set = coll.getTailoredSet();
            logln("Got set: " + set.toPattern(true));
            if (set.size() < data[i].length) {
                errln("Tailored set size smaller (" + set.size() + ") than expected (" + data[i].length + ")");
            }
            for (j = 0; j < data[i].length; j++) {
                logln("Checking to see whether " + data[i][j] + " is in set");
                if (!set.contains(data[i][j])) {
                    errln("Tailored set doesn't contain " + data[i][j] + "... It should");
                }
            }
        } catch (Exception e) {
            warnln("Couldn't open collator with rules " + rules[i]);
        }
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) UnicodeSet(android.icu.text.UnicodeSet) MissingResourceException(java.util.MissingResourceException) Test(org.junit.Test)

Example 40 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationDummyTest method TestVariableTop.

// TestVariableTop() is ported from cintltst/callcoll.c
/**
 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
 * tag has the value shifted, any codepoints before [variable top] should give
 * a primary ce of 0.
 */
@Test
public void TestVariableTop() {
    /*
         * Starting with ICU 53, setting the variable top via a pseudo relation string
         * is not supported any more.
         * It was replaced by the [maxVariable symbol] setting.
         * See ICU tickets #9958 and #8032.
         */
    if (!SUPPORT_VARIABLE_TOP_RELATION) {
        return;
    }
    String rule = "&z = [variable top]";
    Collator myColl;
    Collator enColl;
    char[] source = new char[1];
    char ch;
    int[] expected = { 0 };
    try {
        enColl = Collator.getInstance(Locale.ENGLISH);
    } catch (Exception e) {
        errln("ERROR: Failed to create the collator for ENGLISH");
        return;
    }
    try {
        myColl = new RuleBasedCollator(rule);
    } catch (Exception e) {
        errln("Fail to create RuleBasedCollator with rules:" + rule);
        return;
    }
    enColl.setStrength(Collator.PRIMARY);
    myColl.setStrength(Collator.PRIMARY);
    ((RuleBasedCollator) enColl).setAlternateHandlingShifted(true);
    ((RuleBasedCollator) myColl).setAlternateHandlingShifted(true);
    if (((RuleBasedCollator) enColl).isAlternateHandlingShifted() != true) {
        errln("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }
    // space is supposed to be a variable
    CollationKey key = enColl.getCollationKey(" ");
    byte[] result = key.toByteArray();
    for (int i = 0; i < result.length; i++) {
        if (result[i] != expected[i]) {
            errln("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
            break;
        }
    }
    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        key = myColl.getCollationKey(new String(source));
        result = key.toByteArray();
        for (int i = 0; i < result.length; i++) {
            if (result[i] != expected[i]) {
                errln("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
                break;
            }
        }
        ch++;
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationKey(android.icu.text.CollationKey) RuleBasedCollator(android.icu.text.RuleBasedCollator) Collator(android.icu.text.Collator) Test(org.junit.Test)

Aggregations

RuleBasedCollator (android.icu.text.RuleBasedCollator)140 Test (org.junit.Test)124 Collator (android.icu.text.Collator)42 ULocale (android.icu.util.ULocale)26 CollationElementIterator (android.icu.text.CollationElementIterator)25 Locale (java.util.Locale)22 CollationKey (android.icu.text.CollationKey)17 StringCharacterIterator (java.text.StringCharacterIterator)16 StringSearch (android.icu.text.StringSearch)14 RawCollationKey (android.icu.text.RawCollationKey)11 ParseException (java.text.ParseException)10 UnicodeSet (android.icu.text.UnicodeSet)8 AlphabeticIndex (android.icu.text.AlphabeticIndex)6 BreakIterator (android.icu.text.BreakIterator)6 MissingResourceException (java.util.MissingResourceException)5 IOException (java.io.IOException)4 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)3 UCharacterIterator (android.icu.text.UCharacterIterator)2 CharacterIterator (java.text.CharacterIterator)2 ArrayList (java.util.ArrayList)2