Examples with UnicodeSet - android.icu.text.UnicodeSet

Example 31 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class BasicTest method TestCompare.

@Test
public void TestCompare() {
    // at least as many items as in strings[] !
    String[] s = new String[100];
    int i, j, k, count = strings.length;
    int result, refResult;
    // create the UnicodeStrings
    for (i = 0; i < count; ++i) {
        s[i] = Utility.unescape(strings[i]);
    }
    UTF16.StringComparator comp = new UTF16.StringComparator();
    // test them each with each other
    for (i = 0; i < count; ++i) {
        for (j = i; j < count; ++j) {
            for (k = 0; k < opt.length; ++k) {
                // test Normalizer::compare
                result = norm_compare(s[i], s[j], opt[k].options);
                refResult = ref_norm_compare(s[i], s[j], opt[k].options);
                if (sign(result) != sign(refResult)) {
                    errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
                }
                // test UnicodeString::caseCompare - same internal implementation function
                if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
                    // result=s[i]. (s[j], opt[k].options);
                    if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
                    } else {
                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
                    }
                    comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
                    // result=comp.caseCompare(s[i],s[j], opt[k].options);
                    result = comp.compare(s[i], s[j]);
                    refResult = ref_case_compare(s[i], s[j], opt[k].options);
                    if (sign(result) != sign(refResult)) {
                        errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
                    }
                }
            }
        }
    }
    // test cases with i and I to make sure Turkic works
    char[] iI = new char[] { 0x49, 0x69, 0x130, 0x131 };
    UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
    Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
    nfcImpl.ensureCanonIterData();
    String s1, s2;
    // collect all sets into one for contiguous output
    for (i = 0; i < iI.length; ++i) {
        if (nfcImpl.getCanonStartSet(iI[i], iSet)) {
            set.addAll(iSet);
        }
    }
    // test all of these precomposed characters
    Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
    UnicodeSetIterator it = new UnicodeSetIterator(set);
    int c;
    while (it.next() && (c = it.codepoint) != UnicodeSetIterator.IS_STRING) {
        s1 = UTF16.valueOf(c);
        s2 = nfcNorm2.getDecomposition(c);
        for (k = 0; k < opt.length; ++k) {
            // test Normalizer::compare
            result = norm_compare(s1, s2, opt[k].options);
            refResult = ref_norm_compare(s1, s2, opt[k].options);
            if (sign(result) != sign(refResult)) {
                errln("Normalizer.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
            }
            // test UnicodeString::caseCompare - same internal implementation function
            if ((opt[k].options & Normalizer.COMPARE_IGNORE_CASE) > 0) {
                if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
                    comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
                } else {
                    comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
                }
                comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
                result = comp.compare(s1, s2);
                refResult = ref_case_compare(s1, s2, opt[k].options);
                if (sign(result) != sign(refResult)) {
                    errln("UTF16.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
                }
            }
        }
    }
    // test getDecomposition() for some characters that do not decompose
    if (nfcNorm2.getDecomposition(0x20) != null || nfcNorm2.getDecomposition(0x4e00) != null || nfcNorm2.getDecomposition(0x20002) != null) {
        errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
    }
    // test getRawDecomposition() for some characters that do not decompose
    if (nfcNorm2.getRawDecomposition(0x20) != null || nfcNorm2.getRawDecomposition(0x4e00) != null || nfcNorm2.getRawDecomposition(0x20002) != null) {
        errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
    }
    // test composePair() for some pairs of characters that do not compose
    if (nfcNorm2.composePair(0x20, 0x301) >= 0 || nfcNorm2.composePair(0x61, 0x305) >= 0 || nfcNorm2.composePair(0x1100, 0x1160) >= 0 || nfcNorm2.composePair(0xac00, 0x11a7) >= 0) {
        errln("NFC.composePair() incorrectly composes some pairs of characters");
    }
    // test FilteredNormalizer2.getDecomposition()
    UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff]");
    FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
    if (fn2.getDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
    }
    // test FilteredNormalizer2.getRawDecomposition()
    if (fn2.getRawDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
    }
    // test FilteredNormalizer2::composePair()
    if (0x100 != fn2.composePair(0x41, 0x304) || // unfiltered result: U+1E08
    fn2.composePair(0xc7, 0x301) >= 0) {
        errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) Normalizer2(android.icu.text.Normalizer2) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) UTF16(android.icu.text.UTF16) Normalizer2Impl(android.icu.impl.Normalizer2Impl) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 32 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class BasicTest method TestFilteredAppend.

@Test
public void TestFilteredAppend() {
    Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
    UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
    FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
    // Append two strings that each contain a character outside the filter set.
    StringBuilder sb = new StringBuilder("a\u0313a");
    String second = "\u0301\u0313";
    assertEquals("append()", "a\u0313á\u0313", fn2.append(sb, second).toString());
    // Same, and also normalize the second string.
    sb.replace(0, 0x7fffffff, "a\u0313a");
    assertEquals("normalizeSecondAndAppend()", "a\u0313á\u0313", fn2.normalizeSecondAndAppend(sb, second).toString());
    // Normalizer2.normalize(String) uses spanQuickCheckYes() and normalizeSecondAndAppend().
    assertEquals("normalize()", "a\u0313á\u0313", fn2.normalize("a\u0313a\u0301\u0313"));
}

Also used : FilteredNormalizer2(android.icu.text.FilteredNormalizer2) Normalizer2(android.icu.text.Normalizer2) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 33 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method TestAPI.

@Test
public void TestAPI() {
    // default ct
    UnicodeSet set = new UnicodeSet();
    if (!set.isEmpty() || set.getRangeCount() != 0) {
        errln("FAIL, set should be empty but isn't: " + set);
    }
    // clear(), isEmpty()
    set.add('a');
    if (set.isEmpty()) {
        errln("FAIL, set shouldn't be empty but is: " + set);
    }
    set.clear();
    if (!set.isEmpty()) {
        errln("FAIL, set should be empty but isn't: " + set);
    }
    // size()
    set.clear();
    if (set.size() != 0) {
        errln("FAIL, size should be 0, but is " + set.size() + ": " + set);
    }
    set.add('a');
    if (set.size() != 1) {
        errln("FAIL, size should be 1, but is " + set.size() + ": " + set);
    }
    set.add('1', '9');
    if (set.size() != 10) {
        errln("FAIL, size should be 10, but is " + set.size() + ": " + set);
    }
    set.clear();
    set.complement();
    if (set.size() != 0x110000) {
        errln("FAIL, size should be 0x110000, but is" + set.size());
    }
    // contains(first, last)
    set.clear();
    set.applyPattern("[A-Y 1-8 b-d l-y]");
    for (int i = 0; i < set.getRangeCount(); ++i) {
        int a = set.getRangeStart(i);
        int b = set.getRangeEnd(i);
        if (!set.contains(a, b)) {
            errln("FAIL, should contain " + (char) a + '-' + (char) b + " but doesn't: " + set);
        }
        if (set.contains((char) (a - 1), b)) {
            errln("FAIL, shouldn't contain " + (char) (a - 1) + '-' + (char) b + " but does: " + set);
        }
        if (set.contains(a, (char) (b + 1))) {
            errln("FAIL, shouldn't contain " + (char) a + '-' + (char) (b + 1) + " but does: " + set);
        }
    }
    // Ported InversionList test.
    UnicodeSet a = new UnicodeSet((char) 3, (char) 10);
    UnicodeSet b = new UnicodeSet((char) 7, (char) 15);
    UnicodeSet c = new UnicodeSet();
    logln("a [3-10]: " + a);
    logln("b [7-15]: " + b);
    c.set(a);
    c.addAll(b);
    UnicodeSet exp = new UnicodeSet((char) 3, (char) 15);
    if (c.equals(exp)) {
        logln("c.set(a).add(b): " + c);
    } else {
        errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
    }
    c.complement();
    exp.set((char) 0, (char) 2);
    exp.add((char) 16, UnicodeSet.MAX_VALUE);
    if (c.equals(exp)) {
        logln("c.complement(): " + c);
    } else {
        errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
    }
    c.complement();
    exp.set((char) 3, (char) 15);
    if (c.equals(exp)) {
        logln("c.complement(): " + c);
    } else {
        errln("FAIL: c.complement() = " + c + ", expect " + exp);
    }
    c.set(a);
    c.complementAll(b);
    exp.set((char) 3, (char) 6);
    exp.add((char) 11, (char) 15);
    if (c.equals(exp)) {
        logln("c.set(a).complement(b): " + c);
    } else {
        errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
    }
    exp.set(c);
    c = bitsToSet(setToBits(c));
    if (c.equals(exp)) {
        logln("bitsToSet(setToBits(c)): " + c);
    } else {
        errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
    }
    // Additional tests for coverage JB#2118
    // UnicodeSet::complement(class UnicodeString const &)
    // UnicodeSet::complementAll(class UnicodeString const &)
    // UnicodeSet::containsNone(class UnicodeSet const &)
    // UnicodeSet::containsNone(long,long)
    // UnicodeSet::containsSome(class UnicodeSet const &)
    // UnicodeSet::containsSome(long,long)
    // UnicodeSet::removeAll(class UnicodeString const &)
    // UnicodeSet::retain(long)
    // UnicodeSet::retainAll(class UnicodeString const &)
    // UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
    // UnicodeSetIterator::getString(void)
    set.clear();
    set.complement("ab");
    exp.applyPattern("[{ab}]");
    if (!set.equals(exp)) {
        errln("FAIL: complement(\"ab\")");
        return;
    }
    UnicodeSetIterator iset = new UnicodeSetIterator(set);
    if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
        errln("FAIL: UnicodeSetIterator.next/IS_STRING");
    } else if (!iset.string.equals("ab")) {
        errln("FAIL: UnicodeSetIterator.string");
    }
    set.add((char) 0x61, (char) 0x7A);
    set.complementAll("alan");
    exp.applyPattern("[{ab}b-kmo-z]");
    if (!set.equals(exp)) {
        errln("FAIL: complementAll(\"alan\")");
        return;
    }
    exp.applyPattern("[a-z]");
    if (set.containsNone(exp)) {
        errln("FAIL: containsNone(UnicodeSet)");
    }
    if (!set.containsSome(exp)) {
        errln("FAIL: containsSome(UnicodeSet)");
    }
    exp.applyPattern("[aln]");
    if (!set.containsNone(exp)) {
        errln("FAIL: containsNone(UnicodeSet)");
    }
    if (set.containsSome(exp)) {
        errln("FAIL: containsSome(UnicodeSet)");
    }
    if (set.containsNone((char) 0x61, (char) 0x7A)) {
        errln("FAIL: containsNone(char, char)");
    }
    if (!set.containsSome((char) 0x61, (char) 0x7A)) {
        errln("FAIL: containsSome(char, char)");
    }
    if (!set.containsNone((char) 0x41, (char) 0x5A)) {
        errln("FAIL: containsNone(char, char)");
    }
    if (set.containsSome((char) 0x41, (char) 0x5A)) {
        errln("FAIL: containsSome(char, char)");
    }
    set.removeAll("liu");
    exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
    if (!set.equals(exp)) {
        errln("FAIL: removeAll(\"liu\")");
        return;
    }
    set.retainAll("star");
    exp.applyPattern("[rst]");
    if (!set.equals(exp)) {
        errln("FAIL: retainAll(\"star\")");
        return;
    }
    set.retain((char) 0x73);
    exp.applyPattern("[s]");
    if (!set.equals(exp)) {
        errln("FAIL: retain('s')");
        return;
    }
    // ICU 2.6 coverage tests
    // public final UnicodeSet retain(String s);
    // public final UnicodeSet remove(int c);
    // public final UnicodeSet remove(String s);
    // public int hashCode();
    set.applyPattern("[a-z{ab}{cd}]");
    set.retain("cd");
    exp.applyPattern("[{cd}]");
    if (!set.equals(exp)) {
        errln("FAIL: retain(\"cd\")");
        return;
    }
    set.applyPattern("[a-z{ab}{cd}]");
    set.remove((char) 0x63);
    exp.applyPattern("[abd-z{ab}{cd}]");
    if (!set.equals(exp)) {
        errln("FAIL: remove('c')");
        return;
    }
    set.remove("cd");
    exp.applyPattern("[abd-z{ab}]");
    if (!set.equals(exp)) {
        errln("FAIL: remove(\"cd\")");
        return;
    }
    if (set.hashCode() != exp.hashCode()) {
        errln("FAIL: hashCode() unequal");
    }
    exp.clear();
    if (set.hashCode() == exp.hashCode()) {
        errln("FAIL: hashCode() equal");
    }
    {
        // Cover addAll(Collection) and addAllTo(Collection)
        // Seems that there is a bug in addAll(Collection) operation
        // Ram also add a similar test to UtilityTest.java
        logln("Testing addAll(Collection) ... ");
        String[] array = { "a", "b", "c", "de" };
        List list = Arrays.asList(array);
        Set aset = new HashSet(list);
        logln(" *** The source set's size is: " + aset.size());
        set.clear();
        set.addAll(aset);
        if (set.size() != aset.size()) {
            errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() + ", " + set.size() + " seen instead!");
        } else {
            logln("OK: After addAll, the UnicodeSet size got " + set.size());
        }
        List list2 = new ArrayList();
        set.addAllTo(list2);
        // verify the result
        log(" *** The elements are: ");
        String s = set.toPattern(true);
        logln(s);
        Iterator myiter = list2.iterator();
        while (myiter.hasNext()) {
            log(myiter.next().toString() + "  ");
        }
        // a new line
        logln("");
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet) SortedSet(java.util.SortedSet) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) UnicodeSetIterator(android.icu.text.UnicodeSetIterator) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) UnicodeSet(android.icu.text.UnicodeSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Test(org.junit.Test)

Example 34 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method toPatternAux.

public boolean toPatternAux(int start, int end) {
    // use Integer.toString because Utility.hex doesn't handle ints
    String source = "0x" + Integer.toString(start, 16).toUpperCase();
    if (start != end)
        source += "..0x" + Integer.toString(end, 16).toUpperCase();
    UnicodeSet testSet = new UnicodeSet();
    testSet.add(start, end);
    return checkPat(source, testSet);
}

Also used : UnicodeSet(android.icu.text.UnicodeSet)

Example 35 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method checkRoundTrip.

/**
 * Basic consistency check for a few items.
 * That the iterator works, and that we can create a pattern and
 * get the same thing back
 */
void checkRoundTrip(UnicodeSet s) {
    String pat = s.toPattern(false);
    UnicodeSet t = copyWithIterator(s, false);
    checkEqual(s, t, "iterator roundtrip");
    // try range
    t = copyWithIterator(s, true);
    checkEqual(s, t, "iterator roundtrip");
    t = new UnicodeSet(pat);
    checkEqual(s, t, "toPattern(false)");
    pat = s.toPattern(true);
    t = new UnicodeSet(pat);
    checkEqual(s, t, "toPattern(true)");
}

Also used : UnicodeSet(android.icu.text.UnicodeSet)

Aggregations

UnicodeSet (android.icu.text.UnicodeSet)158 Test (org.junit.Test)112 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)25 Transliterator (android.icu.text.Transliterator)19 ReplaceableString (android.icu.text.ReplaceableString)14 ULocale (android.icu.util.ULocale)13 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)9 Normalizer2 (android.icu.text.Normalizer2)7 RuleBasedCollator (android.icu.text.RuleBasedCollator)7 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)4 SpoofChecker (android.icu.text.SpoofChecker)4 TreeSet (java.util.TreeSet)4 UnicodeMap (android.icu.dev.util.UnicodeMap)3 AlphabeticIndex (android.icu.text.AlphabeticIndex)3 CollationKey (android.icu.text.CollationKey)3 RawCollationKey (android.icu.text.RawCollationKey)3 CheckResult (android.icu.text.SpoofChecker.CheckResult)3 SpanCondition (android.icu.text.UnicodeSet.SpanCondition)3