Search in sources :

Example 66 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class LocaleDataTest method TestEnglishExemplarCharacters.

// Simple test case for checking exemplar character type coverage
@Test
public void TestEnglishExemplarCharacters() {
    final char[] testChars = { // standard
    0x61, // auxiliary
    0xE1, // index
    0x41, // filler for deprecated currency exemplar
    0, // punctuation
    0x2D };
    LocaleData ld = LocaleData.getInstance(ULocale.ENGLISH);
    for (int type = 0; type < LocaleData.ES_COUNT; type++) {
        UnicodeSet exSet = ld.getExemplarSet(0, type);
        if (exSet != null) {
            if (testChars[type] > 0 && !exSet.contains(testChars[type])) {
                errln("Character '" + testChars[type] + "' is not included in exemplar type " + type);
            }
        }
    }
    try {
        // out of bounds value
        ld.getExemplarSet(0, LocaleData.ES_COUNT);
        throw new ICUException("Test failure; should throw exception");
    } catch (IllegalArgumentException e) {
        assertEquals("", "java.lang.ArrayIndexOutOfBoundsException", e.getCause().getClass().getName());
    }
}
Also used : LocaleData(android.icu.util.LocaleData) ICUException(android.icu.util.ICUException) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 67 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class LocaleDataTest method TestExemplarSetTypes.

// Test case created for checking type coverage of static getExemplarSet method.
// See #9785, #9794 and #9795
@Test
public void TestExemplarSetTypes() {
    final String[] testLocales = { // No auxiliary / index exemplars as of ICU 50
    "am", "en", // #9785
    "th", // Bogus locale
    "foo" };
    final int[] testTypes = { LocaleData.ES_STANDARD, LocaleData.ES_AUXILIARY, LocaleData.ES_INDEX, LocaleData.ES_CURRENCY, LocaleData.ES_PUNCTUATION };
    final String[] testTypeNames = { "ES_STANDARD", "ES_AUXILIARY", "ES_INDEX", "ES_CURRENCY", "ES_PUNCTUATION" };
    for (String locstr : testLocales) {
        ULocale loc = new ULocale(locstr);
        for (int i = 0; i < testTypes.length; i++) {
            try {
                UnicodeSet set = LocaleData.getExemplarSet(loc, 0, testTypes[i]);
                if (set == null) {
                    // Not sure null is really OK (#9795)
                    logln(loc + "(" + testTypeNames[i] + ") returned null");
                } else if (set.isEmpty()) {
                    // This is probably reasonable when data is absent
                    logln(loc + "(" + testTypeNames[i] + ") returned an empty set");
                }
            } catch (Exception e) {
                errln(loc + "(" + testTypeNames[i] + ") Exception:" + e.getMessage());
            }
        }
    }
}
Also used : ULocale(android.icu.util.ULocale) UnicodeSet(android.icu.text.UnicodeSet) ICUException(android.icu.util.ICUException) Test(org.junit.Test)

Example 68 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class CollationDataBuilder method setDigitTags.

protected void setDigitTags() {
    UnicodeSet digits = new UnicodeSet("[:Nd:]");
    UnicodeSetIterator iter = new UnicodeSetIterator(digits);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        int c = iter.codepoint;
        int ce32 = trie.get(c);
        if (ce32 != Collation.FALLBACK_CE32 && ce32 != Collation.UNASSIGNED_CE32) {
            int index = addCE32(ce32);
            if (index > Collation.MAX_INDEX) {
                throw new IndexOutOfBoundsException("too many mappings");
            // BufferOverflowException is a better fit
            // but cannot be constructed with a message string.
            }
            ce32 = Collation.makeCE32FromTagIndexAndLength(Collation.DIGIT_TAG, index, // u_charDigitValue(c)
            UCharacter.digit(c));
            trie.set(c, ce32);
        }
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 69 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class CollationBuilder method addTailComposites.

private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
    // Look for the last starter in the NFD string.
    int lastStarter;
    int indexAfterLastStarter = nfdString.length();
    for (; ; ) {
        // no starter at all
        if (indexAfterLastStarter == 0) {
            return;
        }
        lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
        if (nfd.getCombiningClass(lastStarter) == 0) {
            break;
        }
        indexAfterLastStarter -= Character.charCount(lastStarter);
    }
    // No closure to Hangul syllables since we decompose them on the fly.
    if (Hangul.isJamoL(lastStarter)) {
        return;
    }
    // Are there any composites whose decomposition starts with the lastStarter?
    // Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
    // We might find some more equivalent mappings here if it did.
    UnicodeSet composites = new UnicodeSet();
    if (!nfcImpl.getCanonStartSet(lastStarter, composites)) {
        return;
    }
    StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
    long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
    UnicodeSetIterator iter = new UnicodeSetIterator(composites);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        int composite = iter.codepoint;
        String decomp = nfd.getDecomposition(composite);
        if (!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp, newNFDString, newString)) {
            continue;
        }
        int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
        if (newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
            // Ignore mappings that we cannot store.
            continue;
        }
        // Note: It is possible that the newCEs do not make use of the mapping
        // for which we are adding the tail composites, in which case we might be adding
        // unnecessary mappings.
        // For example, when we add tail composites for ae^ (^=combining circumflex),
        // UCA discontiguous-contraction matching does not find any matches
        // for ae_^ (_=any combining diacritic below) *unless* there is also
        // a contraction mapping for ae.
        // Thus, if there is no ae contraction, then the ae^ mapping is ignored
        // while fetching the newCEs for ae_^.
        // TODO: Try to detect this effectively.
        // (Alternatively, print a warning when prefix contractions are missing.)
        // We do not need an explicit mapping for the NFD strings.
        // It is fine if the NFD input collates like this via a sequence of mappings.
        // It also saves a little bit of space, and may reduce the set of characters with contractions.
        int ce32 = addIfDifferent(nfdPrefix, newString, newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
        if (ce32 != Collation.UNASSIGNED_CE32) {
            // was different, was added
            addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
        }
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 70 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TestBoilerplate method getSet.

public static <T> UnicodeSet getSet(Map<Integer, T> m, T value) {
    UnicodeSet result = new UnicodeSet();
    for (Iterator<Integer> it = m.keySet().iterator(); it.hasNext(); ) {
        Integer key = it.next();
        T val = m.get(key);
        if (!val.equals(value))
            continue;
        result.add(key.intValue());
    }
    return result;
}
Also used : UnicodeSet(android.icu.text.UnicodeSet)

Aggregations

UnicodeSet (android.icu.text.UnicodeSet)158 Test (org.junit.Test)112 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)25 Transliterator (android.icu.text.Transliterator)19 ReplaceableString (android.icu.text.ReplaceableString)14 ULocale (android.icu.util.ULocale)13 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)9 Normalizer2 (android.icu.text.Normalizer2)7 RuleBasedCollator (android.icu.text.RuleBasedCollator)7 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)4 SpoofChecker (android.icu.text.SpoofChecker)4 TreeSet (java.util.TreeSet)4 UnicodeMap (android.icu.dev.util.UnicodeMap)3 AlphabeticIndex (android.icu.text.AlphabeticIndex)3 CollationKey (android.icu.text.CollationKey)3 RawCollationKey (android.icu.text.RawCollationKey)3 CheckResult (android.icu.text.SpoofChecker.CheckResult)3 SpanCondition (android.icu.text.UnicodeSet.SpanCondition)3