Examples with UnicodeSetIterator - android.icu.text.UnicodeSetIterator

Example 16 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class LocaleDataTest method TestExemplarSet2.

@Test
public void TestExemplarSet2() {
    int equalCount = 0;
    HashSet testedExemplars = new HashSet();
    for (int i = 0; i < availableLocales.length; i++) {
        ULocale locale = availableLocales[i];
        LocaleData ld = LocaleData.getInstance(locale);
        int[] scriptCodes = UScript.getCode(locale);
        if (scriptCodes == null) {
            if (locale.toString().indexOf(("in")) < 0) {
                errln("UScript.getCode returned null for locale: " + locale);
            }
            continue;
        }
        UnicodeSet[] exemplarSets = new UnicodeSet[4];
        for (int k = 0; k < 2; ++k) {
            // for casing option in (normal, uncased)
            int option = (k == 0) ? 0 : UnicodeSet.CASE;
            for (int h = 0; h < 2; ++h) {
                int type = (h == 0) ? LocaleData.ES_STANDARD : LocaleData.ES_AUXILIARY;
                UnicodeSet exemplarSet = ld.getExemplarSet(option, type);
                exemplarSets[k * 2 + h] = exemplarSet;
                ExemplarGroup exGrp = new ExemplarGroup(exemplarSet, scriptCodes);
                if (!testedExemplars.contains(exGrp)) {
                    testedExemplars.add(exGrp);
                    UnicodeSet[] sets = new UnicodeSet[scriptCodes.length];
                    // create the UnicodeSets for the script
                    for (int j = 0; j < scriptCodes.length; j++) {
                        sets[j] = new UnicodeSet("[:" + UScript.getShortName(scriptCodes[j]) + ":]");
                    }
                    boolean existsInScript = false;
                    UnicodeSetIterator iter = new UnicodeSetIterator(exemplarSet);
                    // iterate over the
                    while (!existsInScript && iter.nextRange()) {
                        if (iter.codepoint != UnicodeSetIterator.IS_STRING) {
                            for (int j = 0; j < sets.length; j++) {
                                if (sets[j].contains(iter.codepoint, iter.codepointEnd)) {
                                    existsInScript = true;
                                    break;
                                }
                            }
                        } else {
                            for (int j = 0; j < sets.length; j++) {
                                if (sets[j].contains(iter.string)) {
                                    existsInScript = true;
                                    break;
                                }
                            }
                        }
                    }
                    // TODO: How to verify LocaleData.ES_AUXILIARY ???
                    if (existsInScript == false && h == 0) {
                        errln("ExemplarSet containment failed for locale,option,type : " + locale + ", " + option + ", " + type);
                    }
                }
            }
        }
        // This is expensive, so only do it if it will be visible
        if (isVerbose()) {
            logln(locale.toString() + " exemplar(ES_STANDARD)" + exemplarSets[0]);
            logln(locale.toString() + " exemplar(ES_AUXILIARY) " + exemplarSets[1]);
            logln(locale.toString() + " exemplar(case-folded,ES_STANDARD) " + exemplarSets[2]);
            logln(locale.toString() + " exemplar(case-folded,ES_AUXILIARY) " + exemplarSets[3]);
        }
        assertTrue(locale.toString() + " case-folded is a superset", exemplarSets[2].containsAll(exemplarSets[0]));
        assertTrue(locale.toString() + " case-folded is a superset", exemplarSets[3].containsAll(exemplarSets[1]));
        if (exemplarSets[2].equals(exemplarSets[0])) {
            ++equalCount;
        }
        if (exemplarSets[3].equals(exemplarSets[1])) {
            ++equalCount;
        }
    }
    // Note: The case-folded set should sometimes be a strict superset
    // and sometimes be equal.
    assertTrue("case-folded is sometimes a strict superset, and sometimes equal", equalCount > 0 && equalCount < availableLocales.length * 2);
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) LocaleData(android.icu.util.LocaleData) ULocale(android.icu.util.ULocale) UnicodeSet(android.icu.text.UnicodeSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class LocaleDataTest method TestExemplarSet.

@Test
public void TestExemplarSet() {
    HashSet testedExemplars = new HashSet();
    int equalCount = 0;
    for (int i = 0; i < availableLocales.length; i++) {
        ULocale locale = availableLocales[i];
        int[] scriptCodes = UScript.getCode(locale);
        if (scriptCodes == null) {
            // so in effect I can never test the script code for Indonesian :(
            if (locale.toString().indexOf(("in")) < 0) {
                errln("UScript.getCode returned null for locale: " + locale);
            }
            continue;
        }
        UnicodeSet[] exemplarSets = new UnicodeSet[2];
        for (int k = 0; k < 2; ++k) {
            // for casing option in (normal, caseInsensitive)
            int option = (k == 0) ? 0 : UnicodeSet.CASE;
            UnicodeSet exemplarSet = LocaleData.getExemplarSet(locale, option);
            exemplarSets[k] = exemplarSet;
            ExemplarGroup exGrp = new ExemplarGroup(exemplarSet, scriptCodes);
            if (!testedExemplars.contains(exGrp)) {
                testedExemplars.add(exGrp);
                UnicodeSet[] sets = new UnicodeSet[scriptCodes.length];
                // create the UnicodeSets for the script
                for (int j = 0; j < scriptCodes.length; j++) {
                    sets[j] = new UnicodeSet("[:" + UScript.getShortName(scriptCodes[j]) + ":]");
                }
                boolean existsInScript = false;
                UnicodeSetIterator iter = new UnicodeSetIterator(exemplarSet);
                // iterate over the
                while (!existsInScript && iter.nextRange()) {
                    if (iter.codepoint != UnicodeSetIterator.IS_STRING) {
                        for (int j = 0; j < sets.length; j++) {
                            if (sets[j].contains(iter.codepoint, iter.codepointEnd)) {
                                existsInScript = true;
                                break;
                            }
                        }
                    } else {
                        for (int j = 0; j < sets.length; j++) {
                            if (sets[j].contains(iter.string)) {
                                existsInScript = true;
                                break;
                            }
                        }
                    }
                }
                if (existsInScript == false) {
                    errln("ExemplarSet containment failed for locale : " + locale);
                }
            }
        }
        // This is expensive, so only do it if it will be visible
        if (isVerbose()) {
            logln(locale.toString() + " exemplar " + exemplarSets[0]);
            logln(locale.toString() + " exemplar(case-folded) " + exemplarSets[1]);
        }
        assertTrue(locale.toString() + " case-folded is a superset", exemplarSets[1].containsAll(exemplarSets[0]));
        if (exemplarSets[1].equals(exemplarSets[0])) {
            ++equalCount;
        }
    }
    // Note: The case-folded set should sometimes be a strict superset
    // and sometimes be equal.
    assertTrue("case-folded is sometimes a strict superset, and sometimes equal", equalCount > 0 && equalCount < availableLocales.length);
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) ULocale(android.icu.util.ULocale) UnicodeSet(android.icu.text.UnicodeSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 18 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationTest method TestTailoredElements.

@Test
public void TestTailoredElements() {
    CollationData root = CollationRoot.getData();
    CollationRootElements rootElements = new CollationRootElements(root.rootElements);
    Set<String> prevLocales = new HashSet<String>();
    prevLocales.add("");
    prevLocales.add("root");
    prevLocales.add("root@collation=standard");
    long[] ces;
    ULocale[] locales = Collator.getAvailableULocales();
    String localeID = "root";
    int locIdx = 0;
    for (; locIdx < locales.length; localeID = locales[locIdx++].getName()) {
        ULocale locale = new ULocale(localeID);
        String[] types = Collator.getKeywordValuesForLocale("collation", locale, false);
        for (int typeIdx = 0; typeIdx < types.length; ++typeIdx) {
            // first: default type
            String type = types[typeIdx];
            if (type.startsWith("private-")) {
                errln("Collator.getKeywordValuesForLocale(" + localeID + ") returns private collation keyword: " + type);
            }
            ULocale localeWithType = locale.setKeywordValue("collation", type);
            Collator coll = Collator.getInstance(localeWithType);
            ULocale actual = coll.getLocale(ULocale.ACTUAL_LOCALE);
            if (prevLocales.contains(actual.getName())) {
                continue;
            }
            prevLocales.add(actual.getName());
            logln("TestTailoredElements(): requested " + localeWithType.getName() + " -> actual " + actual.getName());
            if (!(coll instanceof RuleBasedCollator)) {
                continue;
            }
            RuleBasedCollator rbc = (RuleBasedCollator) coll;
            // Note: It would be better to get tailored strings such that we can
            // identify the prefix, and only get the CEs for the prefix+string,
            // not also for the prefix.
            // There is currently no API for that.
            // It would help in an unusual case where a contraction starting in the prefix
            // extends past its end, and we do not see the intended mapping.
            // For example, for a mapping p|st, if there is also a contraction ps,
            // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
            UnicodeSet tailored = coll.getTailoredSet();
            UnicodeSetIterator iter = new UnicodeSetIterator(tailored);
            while (iter.next()) {
                String s = iter.getString();
                ces = rbc.internalGetCEs(s);
                for (int i = 0; i < ces.length; ++i) {
                    long ce = ces[i];
                    if (!isValidCE(rootElements, root, ce)) {
                        logln(prettify(s));
                        errln("invalid tailored CE 0x" + Utility.hex(ce, 16) + " at CE index " + i + " from string:");
                    }
                }
            }
        }
    }
}

Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) UnicodeSet(android.icu.text.UnicodeSet) Collator(android.icu.text.Collator) RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationRootElements(android.icu.impl.coll.CollationRootElements) UnicodeSetIterator(android.icu.text.UnicodeSetIterator) CollationData(android.icu.impl.coll.CollationData) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 19 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationTest method TestImplicits.

@Test
public void TestImplicits() {
    CollationData cd = CollationRoot.getData();
    // Implicit primary weights should be assigned for the following sets,
    // and sort in ascending order by set and then code point.
    // See http://www.unicode.org/reports/tr10/#Implicit_Weights
    // core Han Unified Ideographs
    UnicodeSet coreHan = new UnicodeSet("[\\p{unified_ideograph}&" + "[\\p{Block=CJK_Unified_Ideographs}" + "\\p{Block=CJK_Compatibility_Ideographs}]]");
    // all other Unified Han ideographs
    UnicodeSet otherHan = new UnicodeSet("[\\p{unified ideograph}-" + "[\\p{Block=CJK_Unified_Ideographs}" + "\\p{Block=CJK_Compatibility_Ideographs}]]");
    UnicodeSet unassigned = new UnicodeSet("[[:Cn:][:Cs:][:Co:]]");
    // These have special CLDR root mappings.
    unassigned.remove(0xfffe, 0xffff);
    // Starting with CLDR 26/ICU 54, the root Han order may instead be
    // the Unihan radical-stroke order.
    // The tests should pass either way, so we only test the order of a small set of Han characters
    // whose radical-stroke order is the same as their code point order.
    UnicodeSet someHanInCPOrder = new UnicodeSet("[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48" + "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]");
    UnicodeSet inOrder = new UnicodeSet(someHanInCPOrder);
    inOrder.addAll(unassigned).freeze();
    UnicodeSet[] sets = { coreHan, otherHan, unassigned };
    int prev = 0;
    long prevPrimary = 0;
    UTF16CollationIterator ci = new UTF16CollationIterator(cd, false, "", 0);
    for (int i = 0; i < sets.length; ++i) {
        UnicodeSetIterator iter = new UnicodeSetIterator(sets[i]);
        while (iter.next()) {
            String s = iter.getString();
            int c = s.codePointAt(0);
            ci.setText(false, s, 0);
            long ce = ci.nextCE();
            long ce2 = ci.nextCE();
            if (ce == Collation.NO_CE || ce2 != Collation.NO_CE) {
                errln("CollationIterator.nextCE(0x" + Utility.hex(c) + ") did not yield exactly one CE");
                continue;
            }
            if ((ce & 0xffffffffL) != Collation.COMMON_SEC_AND_TER_CE) {
                errln("CollationIterator.nextCE(U+" + Utility.hex(c, 4) + ") has non-common sec/ter weights: 0x" + Utility.hex(ce & 0xffffffffL, 8));
                continue;
            }
            long primary = ce >>> 32;
            if (!(primary > prevPrimary) && inOrder.contains(c) && inOrder.contains(prev)) {
                errln("CE(U+" + Utility.hex(c) + ")=0x" + Utility.hex(primary) + ".. not greater than CE(U+" + Utility.hex(prev) + ")=0x" + Utility.hex(prevPrimary) + "..");
            }
            prev = c;
            prevPrimary = primary;
        }
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) FCDUTF16CollationIterator(android.icu.impl.coll.FCDUTF16CollationIterator) UTF16CollationIterator(android.icu.impl.coll.UTF16CollationIterator) CollationData(android.icu.impl.coll.CollationData) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 20 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class TransliteratorTest method TestAny.

/**
 * Test Any-X transliterators with sample letters from all scripts.
 */
@Test
public void TestAny() {
    UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
    StringBuffer testString = new StringBuffer();
    for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
        UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
        int count = 5;
        for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next(); ) {
            testString.append(it.getString());
            if (--count < 0)
                break;
        }
    }
    logln("Sample set for Any-Latin: " + testString);
    Transliterator anyLatin = Transliterator.getInstance("any-Latn");
    String result = anyLatin.transliterate(testString.toString());
    logln("Sample result for Any-Latin: " + result);
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Aggregations

UnicodeSetIterator (android.icu.text.UnicodeSetIterator)31 UnicodeSet (android.icu.text.UnicodeSet)25 Test (org.junit.Test)17 ULocale (android.icu.util.ULocale)6 Transliterator (android.icu.text.Transliterator)5 HashSet (java.util.HashSet)4 RuleBasedCollator (android.icu.text.RuleBasedCollator)3 CollationData (android.icu.impl.coll.CollationData)2 CollationKey (android.icu.text.CollationKey)2 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)2 Normalizer2 (android.icu.text.Normalizer2)2 RawCollationKey (android.icu.text.RawCollationKey)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 Iterator (java.util.Iterator)2 TreeSet (java.util.TreeSet)2 Normalizer2Impl (android.icu.impl.Normalizer2Impl)1