Search in sources :

Example 66 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestPositionHandling.

/**
 * Confirm that the contextStart, contextLimit, start, and limit
 * behave correctly.
 */
@Test
public void TestPositionHandling() {
    // Array of 3n items
    // Each item is <rules>, <input>, <expected output>
    String[] DATA = { "a{t} > SS ; {t}b > UU ; {t} > TT ;", // pos 0,9,0,9
    "xtat txtb", "xTTaSS TTxUUb", "a{t} > SS ; {t}b > UU ; {t} > TT ;", // pos 2,9,3,8
    "xtat txtb", "xtaSS TTxUUb", "a{t} > SS ; {t}b > UU ; {t} > TT ;", // pos 3,8,3,8
    "xtat txtb", "xtaTT TTxTTb" };
    // Array of 4n positions -- these go with the DATA array
    // They are: contextStart, contextLimit, start, limit
    int[] POS = { 0, 9, 0, 9, 2, 9, 3, 8, 3, 8, 3, 8 };
    int n = DATA.length / 3;
    for (int i = 0; i < n; i++) {
        Transliterator t = Transliterator.createFromRules("<ID>", DATA[3 * i], Transliterator.FORWARD);
        Transliterator.Position pos = new Transliterator.Position(POS[4 * i], POS[4 * i + 1], POS[4 * i + 2], POS[4 * i + 3]);
        ReplaceableString rsource = new ReplaceableString(DATA[3 * i + 1]);
        t.transliterate(rsource, pos);
        t.finishTransliteration(rsource, pos);
        String result = rsource.toString();
        String exp = DATA[3 * i + 2];
        expectAux(Utility.escape(DATA[3 * i]), DATA[3 * i + 1], result, result.equals(exp), exp);
    }
}
Also used : ReplaceableString(android.icu.text.ReplaceableString) CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 67 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestThai.

/**
 *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
 *              TODO: confirm that the expected results are correct.
 *              For now, test just confirms that C++ and Java give identical results.
 */
@Test
public void TestThai() {
    Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
    String thaiText = "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
    String latinText = "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
    expect(tr, thaiText, latinText);
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 68 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestKeyboard2.

/**
 * Basic test of keyboard with cursor.
 */
@Test
public void TestKeyboard2() {
    Transliterator t = Transliterator.createFromRules("<ID>", "ych>Y;" + "ps>|y;" + "ch>x;" + "a>A;", Transliterator.FORWARD);
    String[] DATA = { // insertion, buffer
    "a", "A", "p", "Ap", // modified for rollback - "Ay",
    "s", // modified for rollback - "Ay",
    "Aps", // modified for rollback - "Ayc",
    "c", // modified for rollback - "Ayc",
    "Apsc", "a", "AycA", "p", "AycAp", // modified for rollback - "AycAy",
    "s", // modified for rollback - "AycAy",
    "AycAps", // modified for rollback - "AycAyc",
    "c", // modified for rollback - "AycAyc",
    "AycApsc", "h", "AycAY", // null means finishKeyboardTransliteration
    null, // null means finishKeyboardTransliteration
    "AycAY" };
    keyboardAux(t, DATA);
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 69 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestHalfwidthFullwidth.

/**
 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
 */
@Test
public void TestHalfwidthFullwidth() {
    Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
    Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
    // Array of 3n items
    // Each item is
    // "hf"|"fh"|"both",
    // <Halfwidth>,
    // <Fullwidth>
    String[] DATA = { "both", "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000" };
    for (int i = 0; i < DATA.length; i += 3) {
        switch(DATA[i].charAt(0)) {
            case // Halfwidth-Fullwidth only
            'h':
                expect(hf, DATA[i + 1], DATA[i + 2]);
                break;
            case // Fullwidth-Halfwidth only
            'f':
                expect(fh, DATA[i + 2], DATA[i + 1]);
                break;
            case // both directions
            'b':
                expect(hf, DATA[i + 1], DATA[i + 2]);
                expect(fh, DATA[i + 2], DATA[i + 1]);
                break;
        }
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 70 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestAny.

/**
 * Test Any-X transliterators with sample letters from all scripts.
 */
@Test
public void TestAny() {
    UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
    StringBuffer testString = new StringBuffer();
    for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
        UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
        int count = 5;
        for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next(); ) {
            testString.append(it.getString());
            if (--count < 0)
                break;
        }
    }
    logln("Sample set for Any-Latin: " + testString);
    Transliterator anyLatin = Transliterator.getInstance("any-Latn");
    String result = anyLatin.transliterate(testString.toString());
    logln("Sample result for Any-Latin: " + result);
}
Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Aggregations

Transliterator (android.icu.text.Transliterator)97 Test (org.junit.Test)88 ReplaceableString (android.icu.text.ReplaceableString)66 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)57 UnicodeSet (android.icu.text.UnicodeSet)19 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)5 ULocale (android.icu.util.ULocale)3 Enumeration (java.util.Enumeration)3 UnicodeFilter (android.icu.text.UnicodeFilter)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 UnicodeMap (android.icu.dev.util.UnicodeMap)1 CanonicalIterator (android.icu.text.CanonicalIterator)1 Normalizer2 (android.icu.text.Normalizer2)1 Replaceable (android.icu.text.Replaceable)1 BufferedWriter (java.io.BufferedWriter)1 HashSet (java.util.HashSet)1