Search in sources :

Example 16 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method TestToRules.

@Test
public void TestToRules() {
    String RBT = "rbt";
    String SET = "set";
    String[] DATA = { RBT, "$a=\\u4E61; [$a] > A;", "[\\u4E61] > A;", RBT, "$white=[[:Zs:][:Zl:]]; $white{a} > A;", "[[:Zs:][:Zl:]]{a} > A;", SET, "[[:Zs:][:Zl:]]", "[[:Zs:][:Zl:]]", SET, "[:Ps:]", "[:Ps:]", SET, "[:L:]", "[:L:]", SET, "[[:L:]-[A]]", "[[:L:]-[A]]", SET, "[~[:Lu:][:Ll:]]", "[~[:Lu:][:Ll:]]", SET, "[~[a-z]]", "[~[a-z]]", RBT, "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", "[^[:Zs:]]{a} > A;", RBT, "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", "[[a-z]-[:Zs:]]{a} > A;", RBT, "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", "[[:Zs:]&[a-z]]{a} > A;", RBT, "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", "[x[:Zs:]]{a} > A;", RBT, "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;" + "$macron = \\u0304 ;" + "$evowel = [aeiouyAEIOUY] ;" + "$iotasub = \\u0345 ;" + "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", RBT, "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;" };
    for (int d = 0; d < DATA.length; d += 3) {
        if (DATA[d] == RBT) {
            // Transliterator test
            Transliterator t = Transliterator.createFromRules("ID", DATA[d + 1], Transliterator.FORWARD);
            if (t == null) {
                errln("FAIL: createFromRules failed");
                return;
            }
            String rules, escapedRules;
            rules = t.toRules(false);
            escapedRules = t.toRules(true);
            String expRules = Utility.unescape(DATA[d + 2]);
            String expEscapedRules = DATA[d + 2];
            if (rules.equals(expRules)) {
                logln("Ok: " + DATA[d + 1] + " => " + Utility.escape(rules));
            } else {
                errln("FAIL: " + DATA[d + 1] + " => " + Utility.escape(rules + ", exp " + expRules));
            }
            if (escapedRules.equals(expEscapedRules)) {
                logln("Ok: " + DATA[d + 1] + " => " + escapedRules);
            } else {
                errln("FAIL: " + DATA[d + 1] + " => " + escapedRules + ", exp " + expEscapedRules);
            }
        } else {
            // UnicodeSet test
            String pat = DATA[d + 1];
            String expToPat = DATA[d + 2];
            UnicodeSet set = new UnicodeSet(pat);
            // Adjust spacing etc. as necessary.
            String toPat;
            toPat = set.toPattern(true);
            if (expToPat.equals(toPat)) {
                logln("Ok: " + pat + " => " + toPat);
            } else {
                errln("FAIL: " + pat + " => " + Utility.escape(toPat) + ", exp " + Utility.escape(pat));
            }
        }
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 17 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class RoundTripTest method getRepresentativeBoundaryHangul.

private static UnicodeSet getRepresentativeBoundaryHangul() {
    UnicodeSet resultToAddTo = new UnicodeSet();
    // U+1100 HANGUL CHOSEONG KIYEOK
    // U+1161 HANGUL JUNGSEONG A
    UnicodeSet L = new UnicodeSet("[:hst=L:]");
    UnicodeSet V = new UnicodeSet("[:hst=V:]");
    UnicodeSet T = new UnicodeSet("[:hst=T:]");
    String prefixLV = "\u1100\u1161";
    String prefixL = "\u1100";
    String suffixV = "\u1161";
    // HANGUL CHOSEONG IEUNG
    String nullL = "\u110B";
    UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
    for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) {
        for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
            for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) {
                String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
                String trial = Normalizer.compose(sample, false);
                if (trial.length() == 2) {
                    resultToAddTo.add(trial);
                }
            }
        }
    }
    for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) {
        // do all combinations of "g" + V + L + "a"
        final String suffix = iL.getString() + suffixV;
        for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
            String sample = prefixL + iV.getString() + suffix;
            String trial = Normalizer.compose(sample, false);
            if (trial.length() == 2) {
                resultToAddTo.add(trial);
            }
        }
        // do all combinations of "ga" + T + L + "a"
        for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) {
            String sample = prefixLV + iT.getString() + suffix;
            String trial = Normalizer.compose(sample, false);
            if (trial.length() == 2) {
                resultToAddTo.add(trial);
            }
        }
    }
    return resultToAddTo;
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 18 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method TestCompoundFilter.

/**
 * Compound filter semantics were orginially not implemented
 * correctly.  Originally, each component filter f(i) is replaced by
 * f'(i) = f(i) && g, where g is the filter for the compound
 * transliterator.
 *
 * From Mark:
 *
 * Suppose and I have a transliterator X. Internally X is
 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
 *
 * The compound should convert all greek characters (through latin) to
 * cyrillic, then lowercase the result. The filter should say "don't
 * touch 'A' in the original". But because an intermediate result
 * happens to go through "A", the Greek Alpha gets hung up.
 */
@Test
public void TestCompoundFilter() {
    Transliterator t = Transliterator.getInstance("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
    t.setFilter(new UnicodeSet("[^A]"));
    // Only the 'A' at index 1 should remain unchanged
    expect(t, CharsToUnicodeString("BA\\u039A\\u0391"), CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
}
Also used : UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 19 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method checkRegistry.

private void checkRegistry(String id) {
    Transliterator fie = Transliterator.getInstance(id);
    final UnicodeSet fae = new UnicodeSet("[a-z5]");
    fie.setFilter(fae);
    Transliterator foe = Transliterator.getInstance(id);
    UnicodeFilter fee = foe.getFilter();
    if (fae.equals(fee)) {
        errln("Changed what is in registry for " + id);
    }
}
Also used : UnicodeFilter(android.icu.text.UnicodeFilter) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator)

Example 20 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method TestGurmukhiDevanagari.

/**
 * Test Gurmukhi-Devanagari Tippi and Bindi
 */
@Test
public void TestGurmukhiDevanagari() {
    // the rule says:
    // (\u0902) (when preceded by vowel)      --->  (\u0A02)
    // (\u0902) (when preceded by consonant)  --->  (\u0A70)
    UnicodeSet vowel = new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
    UnicodeSet non_vowel = new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
    UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
    UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
    Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
    StringBuffer src = new StringBuffer(" \u0902");
    StringBuffer expect = new StringBuffer(" \u0A02");
    while (vIter.next()) {
        src.setCharAt(0, (char) vIter.codepoint);
        expect.setCharAt(0, (char) (vIter.codepoint + 0x0100));
        expect(trans, src.toString(), expect.toString());
    }
    expect.setCharAt(1, '\u0A70');
    while (nvIter.next()) {
        // src.setCharAt(0,(char) nvIter.codepoint);
        src.setCharAt(0, (char) nvIter.codepoint);
        expect.setCharAt(0, (char) (nvIter.codepoint + 0x0100));
        expect(trans, src.toString(), expect.toString());
    }
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Aggregations

UnicodeSet (android.icu.text.UnicodeSet)158 Test (org.junit.Test)112 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)25 Transliterator (android.icu.text.Transliterator)19 ReplaceableString (android.icu.text.ReplaceableString)14 ULocale (android.icu.util.ULocale)13 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)9 Normalizer2 (android.icu.text.Normalizer2)7 RuleBasedCollator (android.icu.text.RuleBasedCollator)7 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)4 SpoofChecker (android.icu.text.SpoofChecker)4 TreeSet (java.util.TreeSet)4 UnicodeMap (android.icu.dev.util.UnicodeMap)3 AlphabeticIndex (android.icu.text.AlphabeticIndex)3 CollationKey (android.icu.text.CollationKey)3 RawCollationKey (android.icu.text.RawCollationKey)3 CheckResult (android.icu.text.SpoofChecker.CheckResult)3 SpanCondition (android.icu.text.UnicodeSet.SpanCondition)3