Examples with UnicodeSet - android.icu.text.UnicodeSet

Example 96 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method CheckRangeSpeed.

/**
 * @param iterations
 * @param testSet
 */
private void CheckRangeSpeed(int iterations, UnicodeSet testSet) {
    testSet.complement().complement();
    String testPattern = testSet.toString();
    // fill a set of pairs from the pattern
    int[] pairs = new int[testSet.getRangeCount() * 2];
    int j = 0;
    for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.nextRange(); ) {
        pairs[j++] = it.codepoint;
        pairs[j++] = it.codepointEnd;
    }
    UnicodeSet fromRange = new UnicodeSet(testSet);
    assertEquals("from range vs pattern", testSet, fromRange);
    double start = System.currentTimeMillis();
    for (int i = 0; i < iterations; ++i) {
        fromRange = new UnicodeSet(testSet);
    }
    double middle = System.currentTimeMillis();
    for (int i = 0; i < iterations; ++i) {
        new UnicodeSet(testPattern);
    }
    double end = System.currentTimeMillis();
    double rangeConstructorTime = (middle - start) / iterations;
    double patternConstructorTime = (end - middle) / iterations;
    String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t" + percent.format(rangeConstructorTime / patternConstructorTime - 1);
    if (rangeConstructorTime < 2 * patternConstructorTime) {
        logln(message);
    } else {
        errln(message);
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 97 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class BasicTest method initSkippables.

private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
    skipSets[D].applyPattern("[[:NFD_QC=Yes:]&[:ccc=0:]]", false);
    skipSets[C].applyPattern("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
    skipSets[KD].applyPattern("[[:NFKD_QC=Yes:]&[:ccc=0:]]", false);
    skipSets[KC].applyPattern("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
    // Remove from the NFC and NFKC sets all those characters that change
    // when a back-combining character is added.
    // First, get all of the back-combining characters and their combining classes.
    UnicodeSet combineBack = new UnicodeSet("[:NFC_QC=Maybe:]");
    int numCombineBack = combineBack.size();
    int[] combineBackCharsAndCc = new int[numCombineBack * 2];
    UnicodeSetIterator iter = new UnicodeSetIterator(combineBack);
    for (int i = 0; i < numCombineBack; ++i) {
        iter.next();
        int c = iter.codepoint;
        combineBackCharsAndCc[2 * i] = c;
        combineBackCharsAndCc[2 * i + 1] = UCharacter.getCombiningClass(c);
    }
    // We need not look at control codes, Han characters nor Hangul LVT syllables because they
    // do not combine forward. LV syllables are already removed.
    UnicodeSet notInteresting = new UnicodeSet("[[:C:][:Unified_Ideograph:][:HST=LVT:]]");
    UnicodeSet unsure = ((UnicodeSet) (skipSets[C].clone())).removeAll(notInteresting);
    // System.out.format("unsure.size()=%d\n", unsure.size());
    // For each character about which we are unsure, see if it changes when we add
    // one of the back-combining characters.
    Normalizer2 norm2 = Normalizer2.getNFCInstance();
    StringBuilder s = new StringBuilder();
    iter.reset(unsure);
    while (iter.next()) {
        int c = iter.codepoint;
        s.delete(0, 0x7fffffff).appendCodePoint(c);
        int cLength = s.length();
        int tccc = UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
        for (int i = 0; i < numCombineBack; ++i) {
            // If c's decomposition ends with a character with non-zero combining class, then
            // c can only change if it combines with a character with a non-zero combining class.
            int cc2 = combineBackCharsAndCc[2 * i + 1];
            if (tccc == 0 || cc2 != 0) {
                int c2 = combineBackCharsAndCc[2 * i];
                s.appendCodePoint(c2);
                if (!norm2.isNormalized(s)) {
                    // System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
                    skipSets[C].remove(c);
                    skipSets[KC].remove(c);
                    break;
                }
                s.delete(cLength, 0x7fffffff);
            }
        }
    }
    return skipSets;
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) Normalizer2(android.icu.text.Normalizer2) FilteredNormalizer2(android.icu.text.FilteredNormalizer2) UnicodeSet(android.icu.text.UnicodeSet)

Example 98 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class BasicTest method TestSerializedSet.

@Test
public void TestSerializedSet() {
    USerializedSet sset = new USerializedSet();
    UnicodeSet set = new UnicodeSet();
    int start, end;
    char[] serialized = { // length
    0x8007, // bmpLength
    3, 0xc0, 0xfe, 0xfffc, 1, 9, 0x10, 0xfffc };
    sset.getSet(serialized, 0);
    // collect all sets into one for contiguous output
    int[] startEnd = new int[2];
    int count = sset.countRanges();
    for (int j = 0; j < count; ++j) {
        sset.getRange(j, startEnd);
        set.add(startEnd[0], startEnd[1]);
    }
    // test all of these characters
    UnicodeSetIterator it = new UnicodeSetIterator(set);
    while (it.nextRange() && it.codepoint != UnicodeSetIterator.IS_STRING) {
        start = it.codepoint;
        end = it.codepointEnd;
        while (start <= end) {
            if (!sset.contains(start)) {
                errln("USerializedSet.contains failed for " + Utility.hex(start, 8));
            }
            ++start;
        }
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) USerializedSet(android.icu.impl.USerializedSet) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 99 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class RBBITestMonkey method RunMonkey.

/**
 *  Run a RBBI monkey test.  Common routine, for all break iterator types.
 *    Parameters:
 *       bi      - the break iterator to use
 *       mk      - MonkeyKind, abstraction for obtaining expected results
 *       name    - Name of test (char, word, etc.) for use in error messages
 *       seed    - Seed for starting random number generator (parameter from user)
 *       numIterations
 */
void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int numIterations) {
    int TESTSTRINGLEN = 500;
    StringBuffer testText = new StringBuffer();
    int numCharClasses;
    List chClasses;
    int[] expected = new int[TESTSTRINGLEN * 2 + 1];
    int expectedCount = 0;
    boolean[] expectedBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
    boolean[] forwardBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
    boolean[] reverseBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
    boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
    boolean[] followingBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
    boolean[] precedingBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
    int i;
    int loopCount = 0;
    boolean printTestData = false;
    boolean printBreaksFromBI = false;
    m_seed = seed;
    numCharClasses = mk.charClasses().size();
    chClasses = mk.charClasses();
    // Verify that the character classes all have at least one member.
    for (i = 0; i < numCharClasses; i++) {
        UnicodeSet s = (UnicodeSet) chClasses.get(i);
        if (s == null || s.size() == 0) {
            errln("Character Class " + i + " is null or of zero size.");
            return;
        }
    }
    // --------------------------------------------------------------------------------------------
    // 
    // Debugging settings.  Comment out everything in the following block for normal operation
    // 
    // --------------------------------------------------------------------------------------------
    // numIterations = -1;
    // numIterations = 10000;   // Same as exhaustive.
    // RuleBasedBreakIterator_New.fTrace = true;
    // m_seed = 859056465;
    // TESTSTRINGLEN = 50;
    // printTestData = true;
    // printBreaksFromBI = true;
    // ((RuleBasedBreakIterator_New)bi).dump();
    // --------------------------------------------------------------------------------------------
    // 
    // End of Debugging settings.
    // 
    // --------------------------------------------------------------------------------------------
    int dotsOnLine = 0;
    while (loopCount < numIterations || numIterations == -1) {
        if (numIterations == -1 && loopCount % 10 == 0) {
            // If test is running in an infinite loop, display a periodic tic so
            // we can tell that it is making progress.
            System.out.print(".");
            if (dotsOnLine++ >= 80) {
                System.out.println();
                dotsOnLine = 0;
            }
        }
        // Save current random number seed, so that we can recreate the random numbers
        // for this loop iteration in event of an error.
        seed = m_seed;
        testText.setLength(0);
        // Populate a test string with data.
        if (printTestData) {
            System.out.println("Test Data string ...");
        }
        for (i = 0; i < TESTSTRINGLEN; i++) {
            int aClassNum = m_rand() % numCharClasses;
            UnicodeSet classSet = (UnicodeSet) chClasses.get(aClassNum);
            int charIdx = m_rand() % classSet.size();
            int c = classSet.charAt(charIdx);
            if (c < 0) {
                // TODO:  deal with sets containing strings.
                errln("c < 0");
            }
            UTF16.appendCodePoint(testText, c);
            if (printTestData) {
                System.out.print(Integer.toHexString(c) + " ");
            }
        }
        if (printTestData) {
            System.out.println();
        }
        Arrays.fill(expected, 0);
        Arrays.fill(expectedBreaks, false);
        Arrays.fill(forwardBreaks, false);
        Arrays.fill(reverseBreaks, false);
        Arrays.fill(isBoundaryBreaks, false);
        Arrays.fill(followingBreaks, false);
        Arrays.fill(precedingBreaks, false);
        // Calculate the expected results for this test string.
        mk.setText(testText);
        expectedCount = 0;
        expectedBreaks[0] = true;
        expected[expectedCount++] = 0;
        int breakPos = 0;
        int lastBreakPos = -1;
        for (; ; ) {
            lastBreakPos = breakPos;
            breakPos = mk.next(breakPos);
            if (breakPos == -1) {
                break;
            }
            if (breakPos > testText.length()) {
                errln("breakPos > testText.length()");
            }
            if (lastBreakPos >= breakPos) {
                errln("Next() not increasing.");
            // break;
            }
            expectedBreaks[breakPos] = true;
            expected[expectedCount++] = breakPos;
        }
        // Find the break positions using forward iteration
        if (printBreaksFromBI) {
            System.out.println("Breaks from BI...");
        }
        bi.setText(testText.toString());
        for (i = bi.first(); i != BreakIterator.DONE; i = bi.next()) {
            if (i < 0 || i > testText.length()) {
                errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
                break;
            }
            if (printBreaksFromBI) {
                System.out.print(Integer.toHexString(i) + " ");
            }
            forwardBreaks[i] = true;
        }
        if (printBreaksFromBI) {
            System.out.println();
        }
        // Find the break positions using reverse iteration
        for (i = bi.last(); i != BreakIterator.DONE; i = bi.previous()) {
            if (i < 0 || i > testText.length()) {
                errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
                break;
            }
            reverseBreaks[i] = true;
        }
        // Find the break positions using isBoundary() tests.
        for (i = 0; i <= testText.length(); i++) {
            isBoundaryBreaks[i] = bi.isBoundary(i);
        }
        // Find the break positions using the following() function.
        lastBreakPos = 0;
        followingBreaks[0] = true;
        for (i = 0; i < testText.length(); i++) {
            breakPos = bi.following(i);
            if (breakPos <= i || breakPos < lastBreakPos || breakPos > testText.length() || breakPos > lastBreakPos && lastBreakPos > i) {
                errln(name + " break monkey test: " + "Out of range value returned by BreakIterator::following().\n" + "index=" + i + "following returned=" + breakPos + "lastBreak=" + lastBreakPos);
                // Forces an error.
                precedingBreaks[i] = !expectedBreaks[i];
            } else {
                followingBreaks[breakPos] = true;
                lastBreakPos = breakPos;
            }
        }
        // Find the break positions using the preceding() function.
        lastBreakPos = testText.length();
        precedingBreaks[testText.length()] = true;
        for (i = testText.length(); i > 0; i--) {
            breakPos = bi.preceding(i);
            if (breakPos >= i || breakPos > lastBreakPos || breakPos < 0 || breakPos < lastBreakPos && lastBreakPos < i) {
                errln(name + " break monkey test: " + "Out of range value returned by BreakIterator::preceding().\n" + "index=" + i + "preceding returned=" + breakPos + "lastBreak=" + lastBreakPos);
                // Forces an error.
                precedingBreaks[i] = !expectedBreaks[i];
            } else {
                precedingBreaks[breakPos] = true;
                lastBreakPos = breakPos;
            }
        }
        // Compare the expected and actual results.
        for (i = 0; i <= testText.length(); i++) {
            String errorType = null;
            if (forwardBreaks[i] != expectedBreaks[i]) {
                errorType = "next()";
            } else if (reverseBreaks[i] != forwardBreaks[i]) {
                errorType = "previous()";
            } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
                errorType = "isBoundary()";
            } else if (followingBreaks[i] != expectedBreaks[i]) {
                errorType = "following()";
            } else if (precedingBreaks[i] != expectedBreaks[i]) {
                errorType = "preceding()";
            }
            if (errorType != null) {
                // Format a range of the test text that includes the failure as
                // a data item that can be included in the rbbi test data file.
                // Start of the range is the last point where expected and actual results
                // both agreed that there was a break position.
                int startContext = i;
                int count = 0;
                for (; ; ) {
                    if (startContext == 0) {
                        break;
                    }
                    startContext--;
                    if (expectedBreaks[startContext]) {
                        if (count == 2)
                            break;
                        count++;
                    }
                }
                // End of range is two expected breaks past the start position.
                int endContext = i + 1;
                int ci;
                for (ci = 0; ci < 2; ci++) {
                    // Number of items to include in error text.
                    for (; ; ) {
                        if (endContext >= testText.length()) {
                            break;
                        }
                        if (expectedBreaks[endContext - 1]) {
                            if (count == 0)
                                break;
                            count--;
                        }
                        endContext++;
                    }
                }
                // Format looks like   "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
                StringBuffer errorText = new StringBuffer();
                // Char from test data
                int c;
                for (ci = startContext; ci <= endContext && ci != -1; ci = nextCP(testText, ci)) {
                    if (ci == i) {
                        // This is the location of the error.
                        errorText.append("<?>---------------------------------\n");
                    } else if (expectedBreaks[ci]) {
                        // This a non-error expected break position.
                        errorText.append("------------------------------------\n");
                    }
                    if (ci < testText.length()) {
                        c = UTF16.charAt(testText, ci);
                        appendCharToBuf(errorText, c, 11);
                        String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
                        appendToBuf(errorText, gc, 8);
                        int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
                        String extraPropValue = UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
                        appendToBuf(errorText, extraPropValue, 20);
                        String charName = UCharacter.getExtendedName(c);
                        appendToBuf(errorText, charName, 40);
                        errorText.append('\n');
                    }
                }
                if (ci == testText.length() && ci != -1) {
                    errorText.append("<>");
                }
                errorText.append("</data>\n");
                // Output the error
                errln(name + " break monkey test error.  " + (expectedBreaks[i] ? "Break expected but not found." : "Break found but not expected.") + "\nOperation = " + errorType + "; random seed = " + seed + ";  buf Idx = " + i + "\n" + errorText);
                break;
            }
        }
        loopCount++;
    }
}

Also used : List(java.util.List) ArrayList(java.util.ArrayList) UnicodeSet(android.icu.text.UnicodeSet)

Example 100 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class BasicTest method TestSkippable.

@Test
public void TestSkippable() {
    UnicodeSet[] skipSets = new UnicodeSet[] { // NFD
    new UnicodeSet(), // NFC
    new UnicodeSet(), // NFKD
    new UnicodeSet(), // NFKC
    new UnicodeSet() };
    UnicodeSet[] expectSets = new UnicodeSet[] { new UnicodeSet(), new UnicodeSet(), new UnicodeSet(), new UnicodeSet() };
    StringBuilder s, pattern;
    // build NF*Skippable sets from runtime data
    skipSets[D].applyPattern("[:NFD_Inert:]");
    skipSets[C].applyPattern("[:NFC_Inert:]");
    skipSets[KD].applyPattern("[:NFKD_Inert:]");
    skipSets[KC].applyPattern("[:NFKC_Inert:]");
    expectSets = initSkippables(expectSets);
    if (expectSets[D].contains(0x0350)) {
        errln("expectSets[D] contains 0x0350");
    }
    for (int i = 0; i < expectSets.length; ++i) {
        if (!skipSets[i].equals(expectSets[i])) {
            errln("error: TestSkippable skipSets[" + i + "]!=expectedSets[" + i + "]\n");
            // Note: This used to depend on hardcoded UnicodeSet patterns generated by
            // Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
            // running com.ibm.text.UCD.Main with the option NFSkippable.
            // Since ICU 4.6/Unicode 6, we are generating the
            // expectSets ourselves in initSkippables().
            s = new StringBuilder();
            s.append("\n\nskip=       ");
            s.append(skipSets[i].toPattern(true));
            s.append("\n\n");
            s.append("skip-expect=");
            pattern = new StringBuilder(((UnicodeSet) skipSets[i].clone()).removeAll(expectSets[i]).toPattern(true));
            s.append(pattern);
            pattern.delete(0, pattern.length());
            s.append("\n\nexpect-skip=");
            pattern = new StringBuilder(((UnicodeSet) expectSets[i].clone()).removeAll(skipSets[i]).toPattern(true));
            s.append(pattern);
            s.append("\n\n");
            pattern.delete(0, pattern.length());
            s.append("\n\nintersection(expect,skip)=");
            UnicodeSet intersection = ((UnicodeSet) expectSets[i].clone()).retainAll(skipSets[i]);
            pattern = new StringBuilder(intersection.toPattern(true));
            s.append(pattern);
            // Special: test coverage for append(char).
            s.append('\n');
            s.append('\n');
            errln(s.toString());
        }
    }
}

Also used : UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Aggregations

UnicodeSet (android.icu.text.UnicodeSet)158 Test (org.junit.Test)112 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)25 Transliterator (android.icu.text.Transliterator)19 ReplaceableString (android.icu.text.ReplaceableString)14 ULocale (android.icu.util.ULocale)13 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)9 Normalizer2 (android.icu.text.Normalizer2)7 RuleBasedCollator (android.icu.text.RuleBasedCollator)7 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)4 SpoofChecker (android.icu.text.SpoofChecker)4 TreeSet (java.util.TreeSet)4 UnicodeMap (android.icu.dev.util.UnicodeMap)3 AlphabeticIndex (android.icu.text.AlphabeticIndex)3 CollationKey (android.icu.text.CollationKey)3 RawCollationKey (android.icu.text.RawCollationKey)3 CheckResult (android.icu.text.SpoofChecker.CheckResult)3 SpanCondition (android.icu.text.UnicodeSet.SpanCondition)3