Examples with UnicodeSet - android.icu.text.UnicodeSet

Example 86 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class CollationMiscTest method TestImport.

@Test
public void TestImport() {
    try {
        RuleBasedCollator vicoll = (RuleBasedCollator) Collator.getInstance(new ULocale("vi"));
        RuleBasedCollator escoll = (RuleBasedCollator) Collator.getInstance(new ULocale("es"));
        RuleBasedCollator viescoll = new RuleBasedCollator(vicoll.getRules() + escoll.getRules());
        RuleBasedCollator importviescoll = new RuleBasedCollator("[import vi][import es]");
        UnicodeSet tailoredSet = viescoll.getTailoredSet();
        UnicodeSet importTailoredSet = importviescoll.getTailoredSet();
        if (!tailoredSet.equals(importTailoredSet)) {
            warnln("Tailored set not equal");
        }
        for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next(); ) {
            String t = it.getString();
            CollationKey sk1 = viescoll.getCollationKey(t);
            CollationKey sk2 = importviescoll.getCollationKey(t);
            if (!sk1.equals(sk2)) {
                warnln("Collation key's not equal for " + t);
            }
        }
    } catch (Exception e) {
        // Android patch: Add --omitCollationRules to genrb.
        logln("ERROR: in creation of rule based collator");
    // Android patch end.
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) CollationKey(android.icu.text.CollationKey) RawCollationKey(android.icu.text.RawCollationKey) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 87 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class AlphabeticIndexTest method TestFirstCharacters.

@Test
public void TestFirstCharacters() {
    AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH);
    RuleBasedCollator collator = alphabeticIndex.getCollator();
    collator.setStrength(Collator.IDENTICAL);
    Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts();
    // Verify that each script is represented exactly once.
    // Exclude pseudo-scripts like Common (no letters).
    // Exclude scripts like Braille and Sutton SignWriting
    // because they only have symbols, not letters.
    UnicodeSet missingScripts = new UnicodeSet("[^[:inherited:][:unknown:][:common:][:Braille:][:SignWriting:]]");
    String last = "";
    for (String index : firsts) {
        if (collator.compare(last, index) >= 0) {
            errln("Characters not in order: " + last + " !< " + index);
        }
        int script = getFirstRealScript(index);
        if (script == UScript.UNKNOWN) {
            continue;
        }
        UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script);
        if (missingScripts.containsNone(s)) {
            errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false));
        }
        missingScripts.removeAll(s);
    }
    if (missingScripts.size() != 0) {
        String missingScriptNames = "";
        UnicodeSet missingChars = new UnicodeSet(missingScripts);
        for (; ; ) {
            int c = missingChars.charAt(0);
            if (c < 0) {
                break;
            }
            int script = UScript.getScript(c);
            missingScriptNames += " " + UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.SHORT);
            missingChars.removeAll(new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script));
        }
        errln("Missing character from:" + missingScriptNames + " -- " + missingScripts);
    }
}

Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) AlphabeticIndex(android.icu.text.AlphabeticIndex) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 88 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method TestComparison.

@Test
public void TestComparison() {
    UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
    UnicodeSet set2 = new UnicodeSet("[c-e {ch}]").freeze();
    UnicodeSet set3 = new UnicodeSet("[d m-n z {dh}]").freeze();
    // compareTo(UnicodeSet)
    // do indirectly, by sorting
    List<UnicodeSet> unsorted = Arrays.asList(set3, set2, set1);
    List<UnicodeSet> goalShortest = Arrays.asList(set2, set3, set1);
    List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
    List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);
    List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
    assertNotEquals("compareTo-shorter-first", unsorted, sorted);
    assertEquals("compareTo-shorter-first", goalShortest, sorted);
    TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>() {

        public int compare(UnicodeSet o1, UnicodeSet o2) {
            // TODO Auto-generated method stub
            return o1.compareTo(o2, ComparisonStyle.LONGER_FIRST);
        }
    });
    sorted1.addAll(unsorted);
    sorted = new ArrayList(sorted1);
    assertNotEquals("compareTo-longer-first", unsorted, sorted);
    assertEquals("compareTo-longer-first", goalLongest, sorted);
    sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>() {

        public int compare(UnicodeSet o1, UnicodeSet o2) {
            // TODO Auto-generated method stub
            return o1.compareTo(o2, ComparisonStyle.LEXICOGRAPHIC);
        }
    });
    sorted1.addAll(unsorted);
    sorted = new ArrayList(sorted1);
    assertNotEquals("compareTo-lex", unsorted, sorted);
    assertEquals("compareTo-lex", goalLex, sorted);
    // compare(String, int)
    // make a list of interesting combinations
    List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
    TreeSet<String> target = new TreeSet<String>();
    for (String s : sources) {
        target.add(s);
        for (String t : sources) {
            target.add(s + t);
            for (String u : sources) {
                target.add(s + t + u);
            }
        }
    }
    // now compare all the combinations. If any of them is a code point, use it.
    int maxErrorCount = 0;
    compare: for (String last : target) {
        for (String curr : target) {
            int lastCount = Character.codePointCount(last, 0, last.length());
            int currCount = Character.codePointCount(curr, 0, curr.length());
            int comparison;
            if (lastCount == 1) {
                comparison = UnicodeSet.compare(last.codePointAt(0), curr);
            } else if (currCount == 1) {
                comparison = UnicodeSet.compare(last, curr.codePointAt(0));
            } else {
                continue;
            }
            if (comparison != last.compareTo(curr)) {
                // repeat for debugging
                if (lastCount == 1) {
                    comparison = UnicodeSet.compare(last.codePointAt(0), curr);
                } else if (currCount == 1) {
                    comparison = UnicodeSet.compare(last, curr.codePointAt(0));
                }
                if (maxErrorCount++ > 10) {
                    errln(maxErrorCount + " Failure in comparing " + last + " & " + curr + "\tOmitting others...");
                    break compare;
                }
                errln(maxErrorCount + " Failure in comparing " + last + " & " + curr);
            }
        }
    }
    // compare(Iterable<T>, Iterable<T>)
    int max = 10;
    List<String> test1 = new ArrayList<String>(max);
    List<String> test2 = new ArrayList<String>(max);
    for (int i = 0; i <= max; ++i) {
        test1.add("a" + i);
        // add in reverse order
        test2.add("a" + (max - i));
    }
    assertNotEquals("compare iterable test", test1, test2);
    TreeSet<CharSequence> sortedTest1 = new TreeSet<CharSequence>(test1);
    TreeSet<CharSequence> sortedTest2 = new TreeSet<CharSequence>(test2);
    assertEquals("compare iterable test", sortedTest1, sortedTest2);
}

Also used : ArrayList(java.util.ArrayList) UnicodeSet(android.icu.text.UnicodeSet) Comparator(java.util.Comparator) TreeSet(java.util.TreeSet) Test(org.junit.Test)

Example 89 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method TestPropertyAccess.

@Test
public void TestPropertyAccess() {
    int count = 0;
    // test to see that all of the names work
    for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) {
        count++;
        // Skipping tests in the non-exhaustive mode to shorten the test time ticket#6475
        if (TestFmwk.getExhaustiveness() <= 5 && count % 5 != 0) {
            continue;
        }
        if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) {
            // skip the gap
            propNum = UProperty.INT_START;
        }
        for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) {
            String propName;
            try {
                propName = UCharacter.getPropertyName(propNum, nameChoice);
                if (propName == null) {
                    // allow non-existent short names
                    if (nameChoice == UProperty.NameChoice.SHORT)
                        continue;
                    throw new NullPointerException();
                }
            } catch (RuntimeException e1) {
                errln("Can't get property name for: " + "Property (" + propNum + ")" + ", NameChoice: " + nameChoice + ", " + e1.getClass().getName());
                continue;
            }
            logln("Property (" + propNum + "): " + propName);
            for (int valueNum = UCharacter.getIntPropertyMinValue(propNum); valueNum <= UCharacter.getIntPropertyMaxValue(propNum); ++valueNum) {
                String valueName;
                try {
                    valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice);
                    if (valueName == null) {
                        // allow non-existent short names
                        if (nameChoice == UProperty.NameChoice.SHORT)
                            continue;
                        if ((propNum == UProperty.CANONICAL_COMBINING_CLASS || propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS || propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) && !isCccValue(valueNum)) {
                            // Otherwise they are just integer values.
                            continue;
                        } else {
                            throw new NullPointerException();
                        }
                    }
                } catch (RuntimeException e1) {
                    errln("Can't get property value name for: " + "Property (" + propNum + "): " + propName + ", " + "Value (" + valueNum + ") " + ", NameChoice: " + nameChoice + ", " + e1.getClass().getName());
                    continue;
                }
                logln("Value (" + valueNum + "): " + valueName);
                UnicodeSet testSet;
                try {
                    testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
                } catch (RuntimeException e) {
                    errln("Can't create UnicodeSet for: " + "Property (" + propNum + "): " + propName + ", " + "Value (" + valueNum + "): " + valueName + ", " + e.getClass().getName());
                    continue;
                }
                UnicodeSet collectedErrors = new UnicodeSet();
                for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next(); ) {
                    int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
                    if (value != valueNum) {
                        collectedErrors.add(it.codepoint);
                    }
                }
                if (collectedErrors.size() != 0) {
                    errln("Property Value Differs: " + "Property (" + propNum + "): " + propName + ", " + "Value (" + valueNum + "): " + valueName + ", " + "Differing values: " + collectedErrors.toPattern(true));
                }
            }
        }
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 90 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeSetTest method testForSpanGaps.

@Test
public void testForSpanGaps() {
    String[] items = { "a", "b", "c", "{ab}", "{bc}", "{cd}", "{abc}", "{bcd}" };
    final int limit = 1 << items.length;
    // build long string for testing
    StringBuilder longBuffer = new StringBuilder();
    for (int i = 1; i < limit; ++i) {
        longBuffer.append("x");
        longBuffer.append(getCombinations(items, i));
    }
    String longString = longBuffer.toString();
    longString = longString.replace("{", "").replace("}", "");
    long start = System.nanoTime();
    for (int i = 1; i < limit; ++i) {
        UnicodeSet us = new UnicodeSet("[" + getCombinations(items, i) + "]");
        int problemFound = checkSpan(longString, us, SpanCondition.SIMPLE);
        if (problemFound >= 0) {
            assertEquals("Testing " + longString + ", found gap at", -1, problemFound);
            break;
        }
    }
    long end = System.nanoTime();
    logln("Time for SIMPLE   :\t" + (end - start));
    start = System.nanoTime();
    for (int i = 1; i < limit; ++i) {
        UnicodeSet us = new UnicodeSet("[" + getCombinations(items, i) + "]");
        int problemFound = checkSpan(longString, us, SpanCondition.CONTAINED);
        if (problemFound >= 0) {
            assertEquals("Testing " + longString + ", found gap at", -1, problemFound);
            break;
        }
    }
    end = System.nanoTime();
    logln("Time for CONTAINED:\t" + (end - start));
}

Also used : UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Aggregations

UnicodeSet (android.icu.text.UnicodeSet)158 Test (org.junit.Test)112 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)25 Transliterator (android.icu.text.Transliterator)19 ReplaceableString (android.icu.text.ReplaceableString)14 ULocale (android.icu.util.ULocale)13 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)9 Normalizer2 (android.icu.text.Normalizer2)7 RuleBasedCollator (android.icu.text.RuleBasedCollator)7 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)4 SpoofChecker (android.icu.text.SpoofChecker)4 TreeSet (java.util.TreeSet)4 UnicodeMap (android.icu.dev.util.UnicodeMap)3 AlphabeticIndex (android.icu.text.AlphabeticIndex)3 CollationKey (android.icu.text.CollationKey)3 RawCollationKey (android.icu.text.RawCollationKey)3 CheckResult (android.icu.text.SpoofChecker.CheckResult)3 SpanCondition (android.icu.text.UnicodeSet.SpanCondition)3