Search in sources :

Example 86 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestCompoundLatinRT.

@Test
public void TestCompoundLatinRT() {
    int MAX_LEN = 15;
    String[] source = { "rmk\u1E63\u0113t", "\u015Br\u012Bmad", "bhagavadg\u012Bt\u0101", "adhy\u0101ya", "arjuna", "vi\u1E63\u0101da", "y\u014Dga", "dhr\u0325tar\u0101\u1E63\u1E6Dra", "uv\u0101cr\u0325", "dharmak\u1E63\u0113tr\u0113", "kuruk\u1E63\u0113tr\u0113", "samav\u0113t\u0101", "yuyutsava\u1E25", "m\u0101mak\u0101\u1E25", // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
    "kimakurvata", "san\u0304java" };
    String[] expected = { "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", "\u0905\u0927\u094d\u092f\u093e\u092f", "\u0905\u0930\u094d\u091c\u0941\u0928", "\u0935\u093f\u0937\u093e\u0926", "\u092f\u094b\u0917", "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", "\u0909\u0935\u093E\u091A\u0943", "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", "\u0938\u092e\u0935\u0947\u0924\u093e", "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", "\u092e\u093e\u092e\u0915\u093e\u0903", // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
    "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", "\u0938\u0902\u091c\u0935" };
    Transliterator latinToDevToLatin = Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
    Transliterator devToLatinToDev = Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
    for (int i = 0; i < MAX_LEN; i++) {
        expect(latinToDevToLatin, (source[i]), (source[i]));
        expect(devToLatinToDev, (expected[i]), (expected[i]));
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 87 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestSourceTargetSetFilter.

@Test
public void TestSourceTargetSetFilter() {
    String[][] tests = { // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
    { "[] Latin-Greek", null, "[\']" }, { "::[] ; ::NFD ; ::NFKC ; :: ([]) ;" }, { "[] Any-Latin" }, { "[] casefold" }, { "[] NFKD;" }, { "[] NFKC;" }, { "[] hex" }, { "[] lower" }, { "[] null" }, { "[] remove" }, { "[] title" }, { "[] upper" } };
    UnicodeSet expectedSource = UnicodeSet.EMPTY;
    for (String[] testPair : tests) {
        String test = testPair[0];
        Transliterator t0;
        try {
            t0 = Transliterator.getInstance(test);
        } catch (Exception e) {
            t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
        }
        Transliterator t1;
        try {
            t1 = t0.getInverse();
        } catch (Exception e) {
            t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
        }
        int targetIndex = 0;
        for (Transliterator t : new Transliterator[] { t0, t1 }) {
            boolean ok;
            UnicodeSet source = t.getSourceSet();
            String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
            targetIndex++;
            UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource : testPair[targetIndex] == null ? expectedSource : testPair[targetIndex].length() == 0 ? expectedSource : new UnicodeSet(testPair[targetIndex]);
            ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
            if (!ok) {
                // for debugging
                source = t.getSourceSet();
            }
            UnicodeSet target = t.getTargetSet();
            ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
            if (!ok) {
                // for debugging
                target = t.getTargetSet();
            }
        }
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 88 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method expect.

static void expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos) {
    if (pos == null) {
        String result = t.transliterate(source);
        if (!expectAux(t.getID() + ":String", source, result, expectedResult))
            return;
    }
    Transliterator.Position index = null;
    if (pos == null) {
        index = new Transliterator.Position(0, source.length(), 0, source.length());
    } else {
        index = new Transliterator.Position(pos.contextStart, pos.contextLimit, pos.start, pos.limit);
    }
    ReplaceableString rsource = new ReplaceableString(source);
    t.finishTransliteration(rsource, index);
    if (index.start != index.limit) {
        expectAux(t.getID() + ":UNFINISHED", source, "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
        return;
    }
    String result = rsource.toString();
    if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult))
        return;
    if (pos == null) {
        index = new Transliterator.Position();
    } else {
        index = new Transliterator.Position(pos.contextStart, pos.contextLimit, pos.start, pos.limit);
    }
    // Test incremental transliteration -- this result
    // must be the same after we finalize (see below).
    List<String> v = new ArrayList<String>();
    v.add(source);
    rsource.replace(0, rsource.length(), "");
    if (pos != null) {
        rsource.replace(0, 0, source);
        v.add(UtilityExtensions.formatInput(rsource, index));
        t.transliterate(rsource, index);
        v.add(UtilityExtensions.formatInput(rsource, index));
    } else {
        for (int i = 0; i < source.length(); ++i) {
            // v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
            // log.append(source.charAt(i)).append(" -> "));
            t.transliterate(rsource, index, source.charAt(i));
            // v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
            v.add(UtilityExtensions.formatInput(rsource, index) + ((i < source.length() - 1) ? (" + '" + source.charAt(i + 1) + "' ->") : " =>"));
        }
    }
    // As a final step in keyboard transliteration, we must call
    // transliterate to finish off any pending partial matches that
    // were waiting for more input.
    t.finishTransliteration(rsource, index);
    result = rsource.toString();
    // log.append(" => ").append(rsource.toString());
    v.add(result);
    String[] results = new String[v.size()];
    v.toArray(results);
    expectAux(t.getID() + ":Incremental", results, result.equals(expectedResult), expectedResult);
}
Also used : ReplaceableString(android.icu.text.ReplaceableString) ArrayList(java.util.ArrayList) CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator)

Example 89 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class TransliteratorTest method TestIDForms.

/**
 * Test ID form variants
 */
@Test
public void TestIDForms() {
    String[] DATA = { "NFC", null, "NFD", // make sure case is ignored
    "nfd", // make sure case is ignored
    null, // make sure case is ignored
    "NFC", "Any-NFKD", null, "Any-NFKC", "Null", null, "Null", "-nfkc", "nfkc", "NFKD", "-nfkc/", "nfkc", "NFKD", "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", "Source-", null, null, "Source/Variant-", null, null, "Source-/Variant", null, null, "/Variant", null, null, "/Variant-", null, null, "-/Variant", null, null, "-/", null, null, "-", null, null, "/", null, null };
    for (int i = 0; i < DATA.length; i += 3) {
        String ID = DATA[i];
        String expID = DATA[i + 1];
        String expInvID = DATA[i + 2];
        boolean expValid = (expInvID != null);
        if (expID == null) {
            expID = ID;
        }
        try {
            Transliterator t = Transliterator.getInstance(ID);
            Transliterator u = t.getInverse();
            if (t.getID().equals(expID) && u.getID().equals(expInvID)) {
                logln("Ok: " + ID + ".getInverse() => " + expInvID);
            } else {
                errln("FAIL: getInstance(" + ID + ") => " + t.getID() + " x getInverse() => " + u.getID() + ", expected " + expInvID);
            }
        } catch (IllegalArgumentException e) {
            if (!expValid) {
                logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
            } else {
                errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
            }
        }
    }
}
Also used : CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 90 with Transliterator

use of android.icu.text.Transliterator in project j2objc by google.

the class RoundTripTest method TestHan.

@Test
public void TestHan() throws UnsupportedEncodingException, FileNotFoundException {
    try {
        UnicodeSet exemplars = LocaleData.getExemplarSet(new ULocale("zh"), 0);
        // create string with all chars
        StringBuffer b = new StringBuffer();
        for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next(); ) {
            UTF16.append(b, it.codepoint);
        }
        String source = b.toString();
        // transform with Han translit
        Transliterator han = Transliterator.getInstance("Han-Latin");
        String target = han.transliterate(source);
        // now verify that there are no Han characters left
        UnicodeSet allHan = new UnicodeSet("[:han:]");
        assertFalse("No Han must be left after Han-Latin transliteration", allHan.containsSome(target));
        // check the pinyin translit
        Transliterator pn = Transliterator.getInstance("Latin-NumericPinyin");
        String target2 = pn.transliterate(target);
        // verify that there are no marks
        Transliterator nfc = Transliterator.getInstance("nfc");
        String nfced = nfc.transliterate(target2);
        UnicodeSet allMarks = new UnicodeSet("[:mark:]");
        assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfced));
        // verify roundtrip
        Transliterator np = pn.getInverse();
        String target3 = np.transliterate(target);
        boolean roundtripOK = target3.equals(target);
        assertTrue("NumericPinyin must roundtrip", roundtripOK);
        if (!roundtripOK) {
            String filename = "numeric-pinyin.log.txt";
            PrintWriter out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF8"), 4 * 1024));
            errln("Creating log file " + new File(filename).getAbsoluteFile());
            out.println("Pinyin:                " + target);
            out.println("Pinyin-Numeric-Pinyin: " + target2);
            out.close();
        }
    } catch (MissingResourceException ex) {
        warnln("Could not load the locale data for fetching the exemplar characters.");
    }
}
Also used : ULocale(android.icu.util.ULocale) MissingResourceException(java.util.MissingResourceException) UnicodeSet(android.icu.text.UnicodeSet) BufferedWriter(java.io.BufferedWriter) UnicodeSetIterator(android.icu.text.UnicodeSetIterator) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) Transliterator(android.icu.text.Transliterator) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

Transliterator (android.icu.text.Transliterator)97 Test (org.junit.Test)88 ReplaceableString (android.icu.text.ReplaceableString)66 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)57 UnicodeSet (android.icu.text.UnicodeSet)19 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)5 ULocale (android.icu.util.ULocale)3 Enumeration (java.util.Enumeration)3 UnicodeFilter (android.icu.text.UnicodeFilter)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 UnicodeMap (android.icu.dev.util.UnicodeMap)1 CanonicalIterator (android.icu.text.CanonicalIterator)1 Normalizer2 (android.icu.text.Normalizer2)1 Replaceable (android.icu.text.Replaceable)1 BufferedWriter (java.io.BufferedWriter)1 HashSet (java.util.HashSet)1