use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method TestToRules.
@Test
public void TestToRules() {
String RBT = "rbt";
String SET = "set";
String[] DATA = { RBT, "$a=\\u4E61; [$a] > A;", "[\\u4E61] > A;", RBT, "$white=[[:Zs:][:Zl:]]; $white{a} > A;", "[[:Zs:][:Zl:]]{a} > A;", SET, "[[:Zs:][:Zl:]]", "[[:Zs:][:Zl:]]", SET, "[:Ps:]", "[:Ps:]", SET, "[:L:]", "[:L:]", SET, "[[:L:]-[A]]", "[[:L:]-[A]]", SET, "[~[:Lu:][:Ll:]]", "[~[:Lu:][:Ll:]]", SET, "[~[a-z]]", "[~[a-z]]", RBT, "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", "[^[:Zs:]]{a} > A;", RBT, "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", "[[a-z]-[:Zs:]]{a} > A;", RBT, "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", "[[:Zs:]&[a-z]]{a} > A;", RBT, "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", "[x[:Zs:]]{a} > A;", RBT, "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;" + "$macron = \\u0304 ;" + "$evowel = [aeiouyAEIOUY] ;" + "$iotasub = \\u0345 ;" + "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", RBT, "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;" };
for (int d = 0; d < DATA.length; d += 3) {
if (DATA[d] == RBT) {
// Transliterator test
Transliterator t = Transliterator.createFromRules("ID", DATA[d + 1], Transliterator.FORWARD);
if (t == null) {
errln("FAIL: createFromRules failed");
return;
}
String rules, escapedRules;
rules = t.toRules(false);
escapedRules = t.toRules(true);
String expRules = Utility.unescape(DATA[d + 2]);
String expEscapedRules = DATA[d + 2];
if (rules.equals(expRules)) {
logln("Ok: " + DATA[d + 1] + " => " + Utility.escape(rules));
} else {
errln("FAIL: " + DATA[d + 1] + " => " + Utility.escape(rules + ", exp " + expRules));
}
if (escapedRules.equals(expEscapedRules)) {
logln("Ok: " + DATA[d + 1] + " => " + escapedRules);
} else {
errln("FAIL: " + DATA[d + 1] + " => " + escapedRules + ", exp " + expEscapedRules);
}
} else {
// UnicodeSet test
String pat = DATA[d + 1];
String expToPat = DATA[d + 2];
UnicodeSet set = new UnicodeSet(pat);
// Adjust spacing etc. as necessary.
String toPat;
toPat = set.toPattern(true);
if (expToPat.equals(toPat)) {
logln("Ok: " + pat + " => " + toPat);
} else {
errln("FAIL: " + pat + " => " + Utility.escape(toPat) + ", exp " + Utility.escape(pat));
}
}
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class RoundTripTest method getRepresentativeBoundaryHangul.
private static UnicodeSet getRepresentativeBoundaryHangul() {
UnicodeSet resultToAddTo = new UnicodeSet();
// U+1100 HANGUL CHOSEONG KIYEOK
// U+1161 HANGUL JUNGSEONG A
UnicodeSet L = new UnicodeSet("[:hst=L:]");
UnicodeSet V = new UnicodeSet("[:hst=V:]");
UnicodeSet T = new UnicodeSet("[:hst=T:]");
String prefixLV = "\u1100\u1161";
String prefixL = "\u1100";
String suffixV = "\u1161";
// HANGUL CHOSEONG IEUNG
String nullL = "\u110B";
UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) {
for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) {
String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
}
}
for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) {
// do all combinations of "g" + V + L + "a"
final String suffix = iL.getString() + suffixV;
for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
String sample = prefixL + iV.getString() + suffix;
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
// do all combinations of "ga" + T + L + "a"
for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) {
String sample = prefixLV + iT.getString() + suffix;
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
}
return resultToAddTo;
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method TestCompoundFilter.
/**
* Compound filter semantics were orginially not implemented
* correctly. Originally, each component filter f(i) is replaced by
* f'(i) = f(i) && g, where g is the filter for the compound
* transliterator.
*
* From Mark:
*
* Suppose and I have a transliterator X. Internally X is
* "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
*
* The compound should convert all greek characters (through latin) to
* cyrillic, then lowercase the result. The filter should say "don't
* touch 'A' in the original". But because an intermediate result
* happens to go through "A", the Greek Alpha gets hung up.
*/
@Test
public void TestCompoundFilter() {
Transliterator t = Transliterator.getInstance("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
t.setFilter(new UnicodeSet("[^A]"));
// Only the 'A' at index 1 should remain unchanged
expect(t, CharsToUnicodeString("BA\\u039A\\u0391"), CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method checkRegistry.
private void checkRegistry(String id) {
Transliterator fie = Transliterator.getInstance(id);
final UnicodeSet fae = new UnicodeSet("[a-z5]");
fie.setFilter(fae);
Transliterator foe = Transliterator.getInstance(id);
UnicodeFilter fee = foe.getFilter();
if (fae.equals(fee)) {
errln("Changed what is in registry for " + id);
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method TestGurmukhiDevanagari.
/**
* Test Gurmukhi-Devanagari Tippi and Bindi
*/
@Test
public void TestGurmukhiDevanagari() {
// the rule says:
// (\u0902) (when preceded by vowel) ---> (\u0A02)
// (\u0902) (when preceded by consonant) ---> (\u0A70)
UnicodeSet vowel = new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
UnicodeSet non_vowel = new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
StringBuffer src = new StringBuffer(" \u0902");
StringBuffer expect = new StringBuffer(" \u0A02");
while (vIter.next()) {
src.setCharAt(0, (char) vIter.codepoint);
expect.setCharAt(0, (char) (vIter.codepoint + 0x0100));
expect(trans, src.toString(), expect.toString());
}
expect.setCharAt(1, '\u0A70');
while (nvIter.next()) {
// src.setCharAt(0,(char) nvIter.codepoint);
src.setCharAt(0, (char) nvIter.codepoint);
expect.setCharAt(0, (char) (nvIter.codepoint + 0x0100));
expect(trans, src.toString(), expect.toString());
}
}
Aggregations