Search in sources :

Example 31 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestT5160.

/*
     * Test case for threading problem in NormalizationTransliterator
     * reported by ticket#5160
     */
@Test
public void TestT5160() {
    final String[] testData = { "a", "b", "\u09BE", "A\u0301" };
    final String[] expected = { "a", "b", "\u09BE", "\u00C1" };
    Transliterator translit = Transliterator.getInstance("NFC");
    NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
    for (int i = 0; i < tasks.length; i++) {
        tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
    }
    TestUtil.runUntilDone(tasks);
    for (int i = 0; i < tasks.length; i++) {
        if (tasks[i].getErrorMessage() != null) {
            System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
            break;
        }
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 32 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestSpecialCases.

@Test
public void TestSpecialCases() {
    for (int i = 0; i < registerRules.length; ++i) {
        Transliterator t = Transliterator.createFromRules(registerRules[i][0], registerRules[i][1], Transliterator.FORWARD);
        DummyFactory.add(registerRules[i][0], t);
    }
    for (int i = 0; i < testCases.length; ++i) {
        String name = testCases[i][0];
        Transliterator t = Transliterator.getInstance(name);
        String id = t.getID();
        String source = testCases[i][1];
        String target = null;
        if (testCases[i].length > 2)
            target = testCases[i][2];
        else if (id.equalsIgnoreCase("NFD"))
            target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFD);
        else if (id.equalsIgnoreCase("NFC"))
            target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFC);
        else if (id.equalsIgnoreCase("NFKD"))
            target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKD);
        else if (id.equalsIgnoreCase("NFKC"))
            target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKC);
        else if (id.equalsIgnoreCase("Lower"))
            target = UCharacter.toLowerCase(Locale.US, source);
        else if (id.equalsIgnoreCase("Upper"))
            target = UCharacter.toUpperCase(Locale.US, source);
        expect(t, source, target);
    }
    for (int i = 0; i < registerRules.length; ++i) {
        Transliterator.unregister(registerRules[i][0]);
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 33 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestCompoundFilter.

/**
 * Compound filter semantics were orginially not implemented
 * correctly.  Originally, each component filter f(i) is replaced by
 * f'(i) = f(i) && g, where g is the filter for the compound
 * transliterator.
 *
 * From Mark:
 *
 * Suppose and I have a transliterator X. Internally X is
 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
 *
 * The compound should convert all greek characters (through latin) to
 * cyrillic, then lowercase the result. The filter should say "don't
 * touch 'A' in the original". But because an intermediate result
 * happens to go through "A", the Greek Alpha gets hung up.
 */
@Test
public void TestCompoundFilter() {
    Transliterator t = Transliterator.getInstance("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
    t.setFilter(new UnicodeSet("[^A]"));
    // Only the 'A' at index 1 should remain unchanged
    expect(t, CharsToUnicodeString("BA\\u039A\\u0391"), CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
}
Also used : UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 34 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestJ277.

/**
 * Regression test for bugs found in Greek transliteration.
 */
@Test
public void TestJ277() {
    Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
    char sigma = (char) 0x3C3;
    char upsilon = (char) 0x3C5;
    char nu = (char) 0x3BD;
    // not used char PHI = (char)0x3A6;
    char alpha = (char) 0x3B1;
    // not used char omega = (char)0x3C9;
    // not used char omicron = (char)0x3BF;
    // not used char epsilon = (char)0x3B5;
    // sigma upsilon nu -> syn
    StringBuffer buf = new StringBuffer();
    buf.append(sigma).append(upsilon).append(nu);
    String syn = buf.toString();
    expect(gl, syn, "syn");
    // sigma alpha upsilon nu -> saun
    buf.setLength(0);
    buf.append(sigma).append(alpha).append(upsilon).append(nu);
    String sayn = buf.toString();
    expect(gl, sayn, "saun");
    // Again, using a smaller rule set
    String rules = "$alpha   = \u03B1;" + "$nu      = \u03BD;" + "$sigma   = \u03C3;" + "$ypsilon = \u03C5;" + "$vowel   = [aeiouAEIOU$alpha$ypsilon];" + "s <>           $sigma;" + "a <>           $alpha;" + "u <>  $vowel { $ypsilon;" + "y <>           $ypsilon;" + "n <>           $nu;";
    Transliterator mini = Transliterator.createFromRules("mini", rules, Transliterator.REVERSE);
    expect(mini, syn, "syn");
    expect(mini, sayn, "saun");
// |    // Transliterate the Greek locale data
// |    Locale el("el");
// |    DateFormatSymbols syms(el, status);
// |    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
// |    int32_t i, count;
// |    const UnicodeString* data = syms.getMonths(count);
// |    for (i=0; i<count; ++i) {
// |        if (data[i].length() == 0) {
// |            continue;
// |        }
// |        UnicodeString out(data[i]);
// |        gl->transliterate(out);
// |        bool_t ok = TRUE;
// |        if (data[i].length() >= 2 && out.length() >= 2 &&
// |            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
// |            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
// |                ok = FALSE;
// |            }
// |        }
// |        if (ok) {
// |            logln(prettify(data[i] + " -> " + out));
// |        } else {
// |            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
// |        }
// |    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 35 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestHangul.

@Test
public void TestHangul() {
    Transliterator lh = Transliterator.getInstance("Latin-Hangul");
    Transliterator hl = lh.getInverse();
    assertTransform("Transform", "\uCE20", lh, "ch");
    assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
    assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
    assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
    assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
    assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
    assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
    assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
    assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
    assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
    assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
    assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
    assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
    assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
    assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
    assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
    assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
    assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
    assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
    assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
    assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
    assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
    assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
    assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
    assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
    assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
    assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
}
Also used : Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Aggregations

Transliterator (android.icu.text.Transliterator)97 Test (org.junit.Test)88 ReplaceableString (android.icu.text.ReplaceableString)66 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)57 UnicodeSet (android.icu.text.UnicodeSet)19 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)5 ULocale (android.icu.util.ULocale)3 Enumeration (java.util.Enumeration)3 UnicodeFilter (android.icu.text.UnicodeFilter)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 UnicodeMap (android.icu.dev.util.UnicodeMap)1 CanonicalIterator (android.icu.text.CanonicalIterator)1 Normalizer2 (android.icu.text.Normalizer2)1 Replaceable (android.icu.text.Replaceable)1 BufferedWriter (java.io.BufferedWriter)1 HashSet (java.util.HashSet)1