Search in sources :

Example 6 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class UnicodeMap method composeWith.

public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) {
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next(); ) {
        int i = it.codepoint;
        if (i == UnicodeSetIterator.IS_STRING) {
            String s = it.string;
            T v1 = getValue(s);
            T v3 = composer.compose(-1, s, v1, value);
            if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
                put(s, v3);
            }
        } else {
            T v1 = getValue(i);
            T v3 = composer.compose(i, null, v1, value);
            if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
                put(i, v3);
            }
        }
    }
    return this;
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator)

Example 7 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationDataBuilder method setDigitTags.

protected void setDigitTags() {
    UnicodeSet digits = new UnicodeSet("[:Nd:]");
    UnicodeSetIterator iter = new UnicodeSetIterator(digits);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        int c = iter.codepoint;
        int ce32 = trie.get(c);
        if (ce32 != Collation.FALLBACK_CE32 && ce32 != Collation.UNASSIGNED_CE32) {
            int index = addCE32(ce32);
            if (index > Collation.MAX_INDEX) {
                throw new IndexOutOfBoundsException("too many mappings");
            // BufferOverflowException is a better fit
            // but cannot be constructed with a message string.
            }
            ce32 = Collation.makeCE32FromTagIndexAndLength(Collation.DIGIT_TAG, index, // u_charDigitValue(c)
            UCharacter.digit(c));
            trie.set(c, ce32);
        }
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 8 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationDataBuilder method buildContexts.

protected void buildContexts() {
    // Ignore abandoned lists and the cached builtCE32,
    // and build all contexts from scratch.
    contexts.setLength(0);
    UnicodeSetIterator iter = new UnicodeSetIterator(contextChars);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        int c = iter.codepoint;
        int ce32 = trie.get(c);
        if (!isBuilderContextCE32(ce32)) {
            throw new AssertionError("Impossible: No context data for c in contextChars.");
        }
        ConditionalCE32 cond = getConditionalCE32ForCE32(ce32);
        ce32 = buildContext(cond);
        trie.set(c, ce32);
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator)

Example 9 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationDataBuilder method clearContexts.

protected void clearContexts() {
    contexts.setLength(0);
    UnicodeSetIterator iter = new UnicodeSetIterator(contextChars);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        int ce32 = trie.get(iter.codepoint);
        assert (isBuilderContextCE32(ce32));
        getConditionalCE32ForCE32(ce32).builtCE32 = Collation.NO_CE32;
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator)

Example 10 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class BasicTest method TestCompare.

@Test
public void TestCompare() {
    // at least as many items as in strings[] !
    String[] s = new String[100];
    int i, j, k, count = strings.length;
    int result, refResult;
    // create the UnicodeStrings
    for (i = 0; i < count; ++i) {
        s[i] = Utility.unescape(strings[i]);
    }
    UTF16.StringComparator comp = new UTF16.StringComparator();
    // test them each with each other
    for (i = 0; i < count; ++i) {
        for (j = i; j < count; ++j) {
            for (k = 0; k < opt.length; ++k) {
                // test Normalizer::compare
                result = norm_compare(s[i], s[j], opt[k].options);
                refResult = ref_norm_compare(s[i], s[j], opt[k].options);
                if (sign(result) != sign(refResult)) {
                    errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
                }
                // test UnicodeString::caseCompare - same internal implementation function
                if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
                    // result=s[i]. (s[j], opt[k].options);
                    if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
                    } else {
                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
                    }
                    comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
                    // result=comp.caseCompare(s[i],s[j], opt[k].options);
                    result = comp.compare(s[i], s[j]);
                    refResult = ref_case_compare(s[i], s[j], opt[k].options);
                    if (sign(result) != sign(refResult)) {
                        errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
                    }
                }
            }
        }
    }
    // test cases with i and I to make sure Turkic works
    char[] iI = new char[] { 0x49, 0x69, 0x130, 0x131 };
    UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
    Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
    nfcImpl.ensureCanonIterData();
    String s1, s2;
    // collect all sets into one for contiguous output
    for (i = 0; i < iI.length; ++i) {
        if (nfcImpl.getCanonStartSet(iI[i], iSet)) {
            set.addAll(iSet);
        }
    }
    // test all of these precomposed characters
    Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
    UnicodeSetIterator it = new UnicodeSetIterator(set);
    int c;
    while (it.next() && (c = it.codepoint) != UnicodeSetIterator.IS_STRING) {
        s1 = UTF16.valueOf(c);
        s2 = nfcNorm2.getDecomposition(c);
        for (k = 0; k < opt.length; ++k) {
            // test Normalizer::compare
            result = norm_compare(s1, s2, opt[k].options);
            refResult = ref_norm_compare(s1, s2, opt[k].options);
            if (sign(result) != sign(refResult)) {
                errln("Normalizer.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
            }
            // test UnicodeString::caseCompare - same internal implementation function
            if ((opt[k].options & Normalizer.COMPARE_IGNORE_CASE) > 0) {
                if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
                    comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
                } else {
                    comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
                }
                comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
                result = comp.compare(s1, s2);
                refResult = ref_case_compare(s1, s2, opt[k].options);
                if (sign(result) != sign(refResult)) {
                    errln("UTF16.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
                }
            }
        }
    }
    // test getDecomposition() for some characters that do not decompose
    if (nfcNorm2.getDecomposition(0x20) != null || nfcNorm2.getDecomposition(0x4e00) != null || nfcNorm2.getDecomposition(0x20002) != null) {
        errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
    }
    // test getRawDecomposition() for some characters that do not decompose
    if (nfcNorm2.getRawDecomposition(0x20) != null || nfcNorm2.getRawDecomposition(0x4e00) != null || nfcNorm2.getRawDecomposition(0x20002) != null) {
        errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
    }
    // test composePair() for some pairs of characters that do not compose
    if (nfcNorm2.composePair(0x20, 0x301) >= 0 || nfcNorm2.composePair(0x61, 0x305) >= 0 || nfcNorm2.composePair(0x1100, 0x1160) >= 0 || nfcNorm2.composePair(0xac00, 0x11a7) >= 0) {
        errln("NFC.composePair() incorrectly composes some pairs of characters");
    }
    // test FilteredNormalizer2.getDecomposition()
    UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff]");
    FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
    if (fn2.getDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
    }
    // test FilteredNormalizer2.getRawDecomposition()
    if (fn2.getRawDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
    }
    // test FilteredNormalizer2::composePair()
    if (0x100 != fn2.composePair(0x41, 0x304) || // unfiltered result: U+1E08
    fn2.composePair(0xc7, 0x301) >= 0) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) Normalizer2(android.icu.text.Normalizer2) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) UTF16(android.icu.text.UTF16) Normalizer2Impl(android.icu.impl.Normalizer2Impl) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Aggregations

UnicodeSetIterator (android.icu.text.UnicodeSetIterator)31 UnicodeSet (android.icu.text.UnicodeSet)25 Test (org.junit.Test)17 ULocale (android.icu.util.ULocale)6 Transliterator (android.icu.text.Transliterator)5 HashSet (java.util.HashSet)4 RuleBasedCollator (android.icu.text.RuleBasedCollator)3 CollationData (android.icu.impl.coll.CollationData)2 CollationKey (android.icu.text.CollationKey)2 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)2 Normalizer2 (android.icu.text.Normalizer2)2 RawCollationKey (android.icu.text.RawCollationKey)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 Iterator (java.util.Iterator)2 TreeSet (java.util.TreeSet)2 Normalizer2Impl (android.icu.impl.Normalizer2Impl)1