Examples with Normalizer2Impl - android.icu.impl.Normalizer2Impl

Example 1 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class Normalizer method getFC_NFKC_Closure.

/**
 * Gets the FC_NFKC closure value.
 * @param c The code point whose closure value is to be retrieved
 * @return String representation of the closure value; "" if there is none
 * @deprecated ICU 56
 * @hide original deprecated declaration
 */
@Deprecated
public static String getFC_NFKC_Closure(int c) {
    // Compute the FC_NFKC_Closure on the fly:
    // We have the API for complete coverage of Unicode properties, although
    // this value by itself is not useful via API.
    // (What could be useful is a custom normalization table that combines
    // case folding and NFKC.)
    // For the derivation, see Unicode's DerivedNormalizationProps.txt.
    Normalizer2 nfkc = NFKCModeImpl.INSTANCE.normalizer2;
    UCaseProps csp = UCaseProps.INSTANCE;
    // first: b = NFKC(Fold(a))
    StringBuilder folded = new StringBuilder();
    int folded1Length = csp.toFullFolding(c, folded, 0);
    if (folded1Length < 0) {
        Normalizer2Impl nfkcImpl = ((Norm2AllModes.Normalizer2WithImpl) nfkc).impl;
        if (nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c)) != 0) {
            // c does not change at all under CaseFolding+NFKC
            return "";
        }
        folded.appendCodePoint(c);
    } else {
        if (folded1Length > UCaseProps.MAX_STRING_LENGTH) {
            folded.appendCodePoint(folded1Length);
        }
    }
    String kc1 = nfkc.normalize(folded);
    // second: c = NFKC(Fold(b))
    String kc2 = nfkc.normalize(UCharacter.foldCase(kc1, 0));
    // if (c != b) add the mapping from a to c
    if (kc1.equals(kc2)) {
        return "";
    } else {
        return kc2;
    }
}

Also used : UCaseProps(android.icu.impl.UCaseProps) Normalizer2Impl(android.icu.impl.Normalizer2Impl)

Example 2 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class BasicTest method TestCanonIterData.

@Test
public void TestCanonIterData() {
    // For now, just a regression test.
    Normalizer2Impl impl = Norm2AllModes.getNFCInstance().impl.ensureCanonIterData();
    // but it is not a segment starter because it occurs in a decomposition mapping.
    if (impl.isCanonSegmentStarter(0xfb5)) {
        errln("isCanonSegmentStarter(U+0fb5)=true is wrong");
    }
    // For [:Segment_Starter:] to work right, not just the property function has to work right,
    // UnicodeSet also needs a correct range starts set.
    UnicodeSet segStarters = new UnicodeSet("[:Segment_Starter:]").freeze();
    if (segStarters.contains(0xfb5)) {
        errln("[:Segment_Starter:].contains(U+0fb5)=true is wrong");
    }
    // Try characters up to Kana and miscellaneous CJK but below Han (for expediency).
    for (int c = 0; c <= 0x33ff; ++c) {
        boolean isStarter = impl.isCanonSegmentStarter(c);
        boolean isContained = segStarters.contains(c);
        if (isStarter != isContained) {
            errln(String.format("discrepancy: isCanonSegmentStarter(U+%04x)=%5b != " + "[:Segment_Starter:].contains(same)", c, isStarter));
        }
    }
}

Also used : Normalizer2Impl(android.icu.impl.Normalizer2Impl) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 3 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class BasicTest method TestCompare.

@Test
public void TestCompare() {
    // at least as many items as in strings[] !
    String[] s = new String[100];
    int i, j, k, count = strings.length;
    int result, refResult;
    // create the UnicodeStrings
    for (i = 0; i < count; ++i) {
        s[i] = Utility.unescape(strings[i]);
    }
    UTF16.StringComparator comp = new UTF16.StringComparator();
    // test them each with each other
    for (i = 0; i < count; ++i) {
        for (j = i; j < count; ++j) {
            for (k = 0; k < opt.length; ++k) {
                // test Normalizer::compare
                result = norm_compare(s[i], s[j], opt[k].options);
                refResult = ref_norm_compare(s[i], s[j], opt[k].options);
                if (sign(result) != sign(refResult)) {
                    errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
                }
                // test UnicodeString::caseCompare - same internal implementation function
                if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
                    // result=s[i]. (s[j], opt[k].options);
                    if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
                    } else {
                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
                    }
                    comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
                    // result=comp.caseCompare(s[i],s[j], opt[k].options);
                    result = comp.compare(s[i], s[j]);
                    refResult = ref_case_compare(s[i], s[j], opt[k].options);
                    if (sign(result) != sign(refResult)) {
                        errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
                    }
                }
            }
        }
    }
    // test cases with i and I to make sure Turkic works
    char[] iI = new char[] { 0x49, 0x69, 0x130, 0x131 };
    UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
    Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
    nfcImpl.ensureCanonIterData();
    String s1, s2;
    // collect all sets into one for contiguous output
    for (i = 0; i < iI.length; ++i) {
        if (nfcImpl.getCanonStartSet(iI[i], iSet)) {
            set.addAll(iSet);
        }
    }
    // test all of these precomposed characters
    Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
    UnicodeSetIterator it = new UnicodeSetIterator(set);
    int c;
    while (it.next() && (c = it.codepoint) != UnicodeSetIterator.IS_STRING) {
        s1 = UTF16.valueOf(c);
        s2 = nfcNorm2.getDecomposition(c);
        for (k = 0; k < opt.length; ++k) {
            // test Normalizer::compare
            result = norm_compare(s1, s2, opt[k].options);
            refResult = ref_norm_compare(s1, s2, opt[k].options);
            if (sign(result) != sign(refResult)) {
                errln("Normalizer.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
            }
            // test UnicodeString::caseCompare - same internal implementation function
            if ((opt[k].options & Normalizer.COMPARE_IGNORE_CASE) > 0) {
                if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
                    comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
                } else {
                    comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
                }
                comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
                result = comp.compare(s1, s2);
                refResult = ref_case_compare(s1, s2, opt[k].options);
                if (sign(result) != sign(refResult)) {
                    errln("UTF16.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
                }
            }
        }
    }
    // test getDecomposition() for some characters that do not decompose
    if (nfcNorm2.getDecomposition(0x20) != null || nfcNorm2.getDecomposition(0x4e00) != null || nfcNorm2.getDecomposition(0x20002) != null) {
        errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
    }
    // test getRawDecomposition() for some characters that do not decompose
    if (nfcNorm2.getRawDecomposition(0x20) != null || nfcNorm2.getRawDecomposition(0x4e00) != null || nfcNorm2.getRawDecomposition(0x20002) != null) {
        errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
    }
    // test composePair() for some pairs of characters that do not compose
    if (nfcNorm2.composePair(0x20, 0x301) >= 0 || nfcNorm2.composePair(0x61, 0x305) >= 0 || nfcNorm2.composePair(0x1100, 0x1160) >= 0 || nfcNorm2.composePair(0xac00, 0x11a7) >= 0) {
        errln("NFC.composePair() incorrectly composes some pairs of characters");
    }
    // test FilteredNormalizer2.getDecomposition()
    UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff]");
    FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
    if (fn2.getDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
    }
    // test FilteredNormalizer2.getRawDecomposition()
    if (fn2.getRawDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
    }
    // test FilteredNormalizer2::composePair()
    if (0x100 != fn2.composePair(0x41, 0x304) || // unfiltered result: U+1E08
    fn2.composePair(0xc7, 0x301) >= 0) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) Normalizer2(android.icu.text.Normalizer2) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) UTF16(android.icu.text.UTF16) Normalizer2Impl(android.icu.impl.Normalizer2Impl) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 4 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class CollationTailoring method ensureOwnedData.

void ensureOwnedData() {
    if (ownedData == null) {
        Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
        ownedData = new CollationData(nfcImpl);
    }
    data = ownedData;
}

Also used : Normalizer2Impl(android.icu.impl.Normalizer2Impl)

Example 5 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class BasicTest method TestGetsFromImpl.

/*
     * This unit test covers two 'get' methods in class Normalizer2Impl. It only tests that
     * an object is returned.
     */
@Test
public void TestGetsFromImpl() {
    Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
    assertNotEquals("getNormTrie() returns null", null, nfcImpl.getNormTrie());
    assertNotEquals("getFCD16FromBelow180() returns null", null, nfcImpl.getFCD16FromBelow180(0));
}

Also used : Normalizer2Impl(android.icu.impl.Normalizer2Impl) Test(org.junit.Test)

Aggregations

Normalizer2Impl (android.icu.impl.Normalizer2Impl)7 Test (org.junit.Test)3 UCaseProps (android.icu.impl.UCaseProps)2 UnicodeSet (android.icu.text.UnicodeSet)2 CollationSettings (android.icu.impl.coll.CollationSettings)1 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)1 Normalizer2 (android.icu.text.Normalizer2)1 UTF16 (android.icu.text.UTF16)1 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)1