Examples with CollationData - android.icu.impl.coll.CollationData

Example 1 with CollationData

use of android.icu.impl.coll.CollationData in project j2objc by google.

the class CollationTest method TestFCD.

@Test
public void TestFCD() {
    CollationData data = CollationRoot.getData();
    // Input string, not FCD.
    StringBuilder buf = new StringBuilder();
    buf.append("\u0308\u00e1\u0062\u0301\u0327\u0430\u0062").appendCodePoint(// MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
    0x1D15F).append(// ccc=202, 230
    "\u0327\u0308").appendCodePoint(// MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
    0x1D16D).appendCodePoint(0x1D15F).appendCodePoint(0x1D16D).append("\uac01").append(// Character with tccc!=0 decomposed together with mis-ordered sequence.
    "\u00e7").appendCodePoint(0x1D16D).appendCodePoint(0x1D165).append(// Character with tccc!=0 decomposed together with decomposed sequence.
    "\u00e1").append(// Tibetan composite vowels must be decomposed.
    "\u0f73\u0f75").append("\u4e00\u0f81");
    String s = buf.toString();
    // Expected code points.
    int[] cp = { 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62, 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308, 0x1D15F, 0x1D16D, 0xac01, 0x63, 0x327, 0x1D165, 0x1D16D, 0x61, 0xf71, 0xf71, 0xf72, 0xf74, 0x301, 0x4e00, 0xf71, 0xf80 };
    FCDUTF16CollationIterator u16ci = new FCDUTF16CollationIterator(data, false, s, 0);
    CodePointIterator cpi = new CodePointIterator(cp);
    checkFCD("FCDUTF16CollationIterator", u16ci, cpi);
    cpi.resetToStart();
    UCharacterIterator iter = UCharacterIterator.getInstance(s);
    FCDIterCollationIterator uici = new FCDIterCollationIterator(data, false, iter, 0);
    checkFCD("FCDIterCollationIterator", uici, cpi);
}

Also used : FCDUTF16CollationIterator(android.icu.impl.coll.FCDUTF16CollationIterator) UCharacterIterator(android.icu.text.UCharacterIterator) CollationData(android.icu.impl.coll.CollationData) FCDIterCollationIterator(android.icu.impl.coll.FCDIterCollationIterator) Test(org.junit.Test)

Example 2 with CollationData

use of android.icu.impl.coll.CollationData in project j2objc by google.

the class CollationTest method TestSubSequence.

// ICU4C: TestNulTerminated / renamed for ICU4J
@Test
public void TestSubSequence() {
    CollationData data = CollationRoot.getData();
    // { 0x61, 0x62, 0x61, 0x62 }
    final String s = "abab";
    UTF16CollationIterator ci1 = new UTF16CollationIterator(data, false, s, 0);
    UTF16CollationIterator ci2 = new UTF16CollationIterator(data, false, s, 2);
    for (int i = 0; i < 2; ++i) {
        long ce1 = ci1.nextCE();
        long ce2 = ci2.nextCE();
        if (ce1 != ce2) {
            errln("CollationIterator.nextCE(with start position at 0) != " + "nextCE(with start position at 2) at CE " + i);
        }
    }
}

Also used : FCDUTF16CollationIterator(android.icu.impl.coll.FCDUTF16CollationIterator) UTF16CollationIterator(android.icu.impl.coll.UTF16CollationIterator) CollationData(android.icu.impl.coll.CollationData) Test(org.junit.Test)

Example 3 with CollationData

use of android.icu.impl.coll.CollationData in project j2objc by google.

the class CollationTest method TestTailoredElements.

@Test
public void TestTailoredElements() {
    CollationData root = CollationRoot.getData();
    CollationRootElements rootElements = new CollationRootElements(root.rootElements);
    Set<String> prevLocales = new HashSet<String>();
    prevLocales.add("");
    prevLocales.add("root");
    prevLocales.add("root@collation=standard");
    long[] ces;
    ULocale[] locales = Collator.getAvailableULocales();
    String localeID = "root";
    int locIdx = 0;
    for (; locIdx < locales.length; localeID = locales[locIdx++].getName()) {
        ULocale locale = new ULocale(localeID);
        String[] types = Collator.getKeywordValuesForLocale("collation", locale, false);
        for (int typeIdx = 0; typeIdx < types.length; ++typeIdx) {
            // first: default type
            String type = types[typeIdx];
            if (type.startsWith("private-")) {
                errln("Collator.getKeywordValuesForLocale(" + localeID + ") returns private collation keyword: " + type);
            }
            ULocale localeWithType = locale.setKeywordValue("collation", type);
            Collator coll = Collator.getInstance(localeWithType);
            ULocale actual = coll.getLocale(ULocale.ACTUAL_LOCALE);
            if (prevLocales.contains(actual.getName())) {
                continue;
            }
            prevLocales.add(actual.getName());
            logln("TestTailoredElements(): requested " + localeWithType.getName() + " -> actual " + actual.getName());
            if (!(coll instanceof RuleBasedCollator)) {
                continue;
            }
            RuleBasedCollator rbc = (RuleBasedCollator) coll;
            // Note: It would be better to get tailored strings such that we can
            // identify the prefix, and only get the CEs for the prefix+string,
            // not also for the prefix.
            // There is currently no API for that.
            // It would help in an unusual case where a contraction starting in the prefix
            // extends past its end, and we do not see the intended mapping.
            // For example, for a mapping p|st, if there is also a contraction ps,
            // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
            UnicodeSet tailored = coll.getTailoredSet();
            UnicodeSetIterator iter = new UnicodeSetIterator(tailored);
            while (iter.next()) {
                String s = iter.getString();
                ces = rbc.internalGetCEs(s);
                for (int i = 0; i < ces.length; ++i) {
                    long ce = ces[i];
                    if (!isValidCE(rootElements, root, ce)) {
                        logln(prettify(s));
                        errln("invalid tailored CE 0x" + Utility.hex(ce, 16) + " at CE index " + i + " from string:");
                    }
                }
            }
        }
    }
}

Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) UnicodeSet(android.icu.text.UnicodeSet) Collator(android.icu.text.Collator) RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationRootElements(android.icu.impl.coll.CollationRootElements) UnicodeSetIterator(android.icu.text.UnicodeSetIterator) CollationData(android.icu.impl.coll.CollationData) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with CollationData

use of android.icu.impl.coll.CollationData in project j2objc by google.

the class CollationTest method TestRootElements.

@Test
public void TestRootElements() {
    CollationData root = CollationRoot.getData();
    CollationRootElements rootElements = new CollationRootElements(root.rootElements);
    RootElementsIterator iter = new RootElementsIterator(root);
    // We check each root CE for validity,
    // and we also verify that there is a tailoring gap between each two CEs.
    // compressible primary weights
    CollationWeights cw1c = new CollationWeights();
    // uncompressible primary weights
    CollationWeights cw1u = new CollationWeights();
    CollationWeights cw2 = new CollationWeights();
    CollationWeights cw3 = new CollationWeights();
    cw1c.initForPrimary(true);
    cw1u.initForPrimary(false);
    cw2.initForSecondary();
    cw3.initForTertiary();
    // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
    // nor the special merge-separator CE for U+FFFE.
    long prevPri = 0;
    long prevSec = 0;
    long prevTer = 0;
    while (iter.next()) {
        long pri = iter.getPrimary();
        long secTer = iter.getSecTer();
        // CollationRootElements CEs must have 0 case and quaternary bits.
        if ((secTer & Collation.CASE_AND_QUATERNARY_MASK) != 0) {
            errln("CollationRootElements CE has non-zero case and/or quaternary bits: " + "0x" + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
        }
        long sec = secTer >>> 16;
        long ter = secTer & Collation.ONLY_TERTIARY_MASK;
        long ctq = ter;
        if (pri == 0 && sec == 0 && ter != 0) {
            // Tertiary CEs must have uppercase bits,
            // but they are not stored in the CollationRootElements.
            ctq |= 0x8000;
        }
        if (!isValidCE(rootElements, root, pri, sec, ctq)) {
            errln("invalid root CE 0x" + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
        } else {
            if (pri != prevPri) {
                long newWeight = 0;
                if (prevPri == 0 || prevPri >= Collation.FFFD_PRIMARY) {
                // There is currently no tailoring gap after primary ignorables,
                // and we forbid tailoring after U+FFFD and U+FFFF.
                } else if (root.isCompressiblePrimary(prevPri)) {
                    if (!cw1c.allocWeights(prevPri, pri, 1)) {
                        errln("no primary/compressible tailoring gap between " + "0x" + Utility.hex(prevPri, 8) + " and 0x" + Utility.hex(pri, 8));
                    } else {
                        newWeight = cw1c.nextWeight();
                    }
                } else {
                    if (!cw1u.allocWeights(prevPri, pri, 1)) {
                        errln("no primary/uncompressible tailoring gap between " + "0x" + Utility.hex(prevPri, 8) + " and 0x" + Utility.hex(pri, 8));
                    } else {
                        newWeight = cw1u.nextWeight();
                    }
                }
                if (newWeight != 0 && !(prevPri < newWeight && newWeight < pri)) {
                    errln("mis-allocated primary weight, should get " + "0x" + Utility.hex(prevPri, 8) + " < 0x" + Utility.hex(newWeight, 8) + " < 0x" + Utility.hex(pri, 8));
                }
            } else if (sec != prevSec) {
                long lowerLimit = prevSec == 0 ? rootElements.getSecondaryBoundary() - 0x100 : prevSec;
                if (!cw2.allocWeights(lowerLimit, sec, 1)) {
                    errln("no secondary tailoring gap between " + "0x" + Utility.hex(lowerLimit) + " and 0x" + Utility.hex(sec));
                } else {
                    long newWeight = cw2.nextWeight();
                    if (!(prevSec < newWeight && newWeight < sec)) {
                        errln("mis-allocated secondary weight, should get " + "0x" + Utility.hex(lowerLimit) + " < 0x" + Utility.hex(newWeight) + " < 0x" + Utility.hex(sec));
                    }
                }
            } else if (ter != prevTer) {
                long lowerLimit = prevTer == 0 ? rootElements.getTertiaryBoundary() - 0x100 : prevTer;
                if (!cw3.allocWeights(lowerLimit, ter, 1)) {
                    errln("no tertiary tailoring gap between " + "0x" + Utility.hex(lowerLimit) + " and 0x" + Utility.hex(ter));
                } else {
                    long newWeight = cw3.nextWeight();
                    if (!(prevTer < newWeight && newWeight < ter)) {
                        errln("mis-allocated tertiary weight, should get " + "0x" + Utility.hex(lowerLimit) + " < 0x" + Utility.hex(newWeight) + " < 0x" + Utility.hex(ter));
                    }
                }
            } else {
                errln("duplicate root CE 0x" + Utility.hex(pri, 8) + " 0x" + Utility.hex(secTer, 8));
            }
        }
        prevPri = pri;
        prevSec = sec;
        prevTer = ter;
    }
}

Also used : CollationRootElements(android.icu.impl.coll.CollationRootElements) CollationData(android.icu.impl.coll.CollationData) CollationWeights(android.icu.impl.coll.CollationWeights) Test(org.junit.Test)

Example 5 with CollationData

use of android.icu.impl.coll.CollationData in project j2objc by google.

the class CollationTest method TestImplicits.

@Test
public void TestImplicits() {
    CollationData cd = CollationRoot.getData();
    // Implicit primary weights should be assigned for the following sets,
    // and sort in ascending order by set and then code point.
    // See http://www.unicode.org/reports/tr10/#Implicit_Weights
    // core Han Unified Ideographs
    UnicodeSet coreHan = new UnicodeSet("[\\p{unified_ideograph}&" + "[\\p{Block=CJK_Unified_Ideographs}" + "\\p{Block=CJK_Compatibility_Ideographs}]]");
    // all other Unified Han ideographs
    UnicodeSet otherHan = new UnicodeSet("[\\p{unified ideograph}-" + "[\\p{Block=CJK_Unified_Ideographs}" + "\\p{Block=CJK_Compatibility_Ideographs}]]");
    UnicodeSet unassigned = new UnicodeSet("[[:Cn:][:Cs:][:Co:]]");
    // These have special CLDR root mappings.
    unassigned.remove(0xfffe, 0xffff);
    // Starting with CLDR 26/ICU 54, the root Han order may instead be
    // the Unihan radical-stroke order.
    // The tests should pass either way, so we only test the order of a small set of Han characters
    // whose radical-stroke order is the same as their code point order.
    UnicodeSet someHanInCPOrder = new UnicodeSet("[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48" + "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]");
    UnicodeSet inOrder = new UnicodeSet(someHanInCPOrder);
    inOrder.addAll(unassigned).freeze();
    UnicodeSet[] sets = { coreHan, otherHan, unassigned };
    int prev = 0;
    long prevPrimary = 0;
    UTF16CollationIterator ci = new UTF16CollationIterator(cd, false, "", 0);
    for (int i = 0; i < sets.length; ++i) {
        UnicodeSetIterator iter = new UnicodeSetIterator(sets[i]);
        while (iter.next()) {
            String s = iter.getString();
            int c = s.codePointAt(0);
            ci.setText(false, s, 0);
            long ce = ci.nextCE();
            long ce2 = ci.nextCE();
            if (ce == Collation.NO_CE || ce2 != Collation.NO_CE) {
                errln("CollationIterator.nextCE(0x" + Utility.hex(c) + ") did not yield exactly one CE");
                continue;
            }
            if ((ce & 0xffffffffL) != Collation.COMMON_SEC_AND_TER_CE) {
                errln("CollationIterator.nextCE(U+" + Utility.hex(c, 4) + ") has non-common sec/ter weights: 0x" + Utility.hex(ce & 0xffffffffL, 8));
                continue;
            }
            long primary = ce >>> 32;
            if (!(primary > prevPrimary) && inOrder.contains(c) && inOrder.contains(prev)) {
                errln("CE(U+" + Utility.hex(c) + ")=0x" + Utility.hex(primary) + ".. not greater than CE(U+" + Utility.hex(prev) + ")=0x" + Utility.hex(prevPrimary) + "..");
            }
            prev = c;
            prevPrimary = primary;
        }
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) FCDUTF16CollationIterator(android.icu.impl.coll.FCDUTF16CollationIterator) UTF16CollationIterator(android.icu.impl.coll.UTF16CollationIterator) CollationData(android.icu.impl.coll.CollationData) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Aggregations

CollationData (android.icu.impl.coll.CollationData)5 Test (org.junit.Test)5 FCDUTF16CollationIterator (android.icu.impl.coll.FCDUTF16CollationIterator)3 CollationRootElements (android.icu.impl.coll.CollationRootElements)2 UTF16CollationIterator (android.icu.impl.coll.UTF16CollationIterator)2 UnicodeSet (android.icu.text.UnicodeSet)2 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)2 CollationWeights (android.icu.impl.coll.CollationWeights)1 FCDIterCollationIterator (android.icu.impl.coll.FCDIterCollationIterator)1 Collator (android.icu.text.Collator)1 RuleBasedCollator (android.icu.text.RuleBasedCollator)1 UCharacterIterator (android.icu.text.UCharacterIterator)1 ULocale (android.icu.util.ULocale)1 HashSet (java.util.HashSet)1