use of android.icu.text.Transliterator in project j2objc by google.
the class TransliteratorTest method TestSourceTargetSet2.
@Test
public void TestSourceTargetSet2() {
Normalizer2 nfc = Normalizer2.getNFCInstance();
Normalizer2 nfd = Normalizer2.getNFDInstance();
// Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
// UnicodeSet nfkdSource = new UnicodeSet();
// UnicodeSet nfkdTarget = new UnicodeSet();
// for (int i = 0; i <= 0x10FFFF; ++i) {
// if (nfkd.isInert(i)) {
// continue;
// }
// nfkdSource.add(i);
// String t = nfkd.getDecomposition(i);
// if (t != null) {
// nfkdTarget.addAll(t);
// } else {
// nfkdTarget.add(i);
// }
// }
// nfkdSource.freeze();
// nfkdTarget.freeze();
// logln("NFKD Source: " + nfkdSource.toPattern(false));
// logln("NFKD Target: " + nfkdTarget.toPattern(false));
UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
CanonicalIterator can = new CanonicalIterator("");
UnicodeSet disorderedMarks = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
String s = nfd.getDecomposition(i);
if (s == null) {
continue;
}
can.setSource(s);
for (String t = can.next(); t != null; t = can.next()) {
disorderedMarks.add(t);
}
// if s has two code points, (or more), add the lead/trail information
int first = s.codePointAt(0);
int firstCount = Character.charCount(first);
if (s.length() == firstCount)
continue;
String trailString = s.substring(firstCount);
// add all the trail characters
if (!nonStarters.containsSome(trailString)) {
continue;
}
UnicodeSet trailSet = leadToTrail.get(first);
if (trailSet == null) {
leadToTrail.put(first, trailSet = new UnicodeSet());
}
// add remaining trails
trailSet.addAll(trailString);
// add the sources
UnicodeSet sourcesSet = leadToSources.get(first);
if (sourcesSet == null) {
leadToSources.put(first, sourcesSet = new UnicodeSet());
}
sourcesSet.add(i);
}
for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
String lead = x.getKey();
UnicodeSet sources = x.getValue();
UnicodeSet trailSet = leadToTrail.get(lead);
for (String source : sources) {
for (String trail : trailSet) {
can.setSource(source + trail);
for (String t = can.next(); t != null; t = can.next()) {
if (t.endsWith(trail))
continue;
disorderedMarks.add(t);
}
}
}
}
for (String s : nonStarters) {
disorderedMarks.add("\u0345" + s);
disorderedMarks.add(s + "\u0323");
String xx = nfc.normalize("\u01EC" + s);
if (!xx.startsWith("\u01EC")) {
logln("??");
}
}
// for (int i = 0; i <= 0x10FFFF; ++i) {
// String s = nfkd.getDecomposition(i);
// if (s != null) {
// disorderedMarks.add(s);
// disorderedMarks.add(nfc.normalize(s));
// addDerivedStrings(nfc, disorderedMarks, s);
// }
// s = nfd.getDecomposition(i);
// if (s != null) {
// disorderedMarks.add(s);
// }
// if (!nfc.isInert(i)) {
// if (i == 0x00C0) {
// logln("\u00C0");
// }
// can.setSource(s+"\u0334");
// for (String t = can.next(); t != null; t = can.next()) {
// addDerivedStrings(nfc, disorderedMarks, t);
// }
// can.setSource(s+"\u0345");
// for (String t = can.next(); t != null; t = can.next()) {
// addDerivedStrings(nfc, disorderedMarks, t);
// }
// can.setSource(s+"\u0323");
// for (String t = can.next(); t != null; t = can.next()) {
// addDerivedStrings(nfc, disorderedMarks, t);
// }
// }
// }
logln("Test cases: " + disorderedMarks.size());
disorderedMarks.addAll(0, 0x10FFFF).freeze();
logln("isInert \u0104 " + nfc.isInert('\u0104'));
Object[][] rules = { { ":: [:sc=COMMON:] any-name;", null }, { ":: [:Greek:] hex-any/C;", null }, { ":: [:Greek:] any-hex/C;", null }, { ":: [[:Mn:][:Me:]] remove;", null }, { ":: [[:Mn:][:Me:]] null;", null }, { ":: lower;", null }, { ":: upper;", null }, { ":: title;", null }, { ":: CaseFold;", null }, { ":: NFD;", null }, { ":: NFC;", null }, { ":: NFKD;", null }, { ":: NFKC;", null }, { ":: [[:Mn:][:Me:]] NFKD;", null }, { ":: Latin-Greek;", null }, { ":: [:Latin:] NFKD;", null }, { ":: NFKD;", null }, { ":: NFKD;\n" + ":: [[:Mn:][:Me:]] remove;\n" + ":: NFC;", null } };
for (Object[] rulex : rules) {
String rule = (String) rulex[0];
Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
UnicodeSet actualSource = trans.getSourceSet();
UnicodeSet actualTarget = trans.getTargetSet();
UnicodeSet empiricalSource = new UnicodeSet();
UnicodeSet empiricalTarget = new UnicodeSet();
String ruleDisplay = rule.replace("\n", "\t\t");
UnicodeSet toTest = disorderedMarks;
// if (rulex[1] != null) {
// toTest = new UnicodeSet(disorderedMarks);
// toTest.addAll((UnicodeSet) rulex[1]);
// }
String test = nfd.normalize("\u0104");
boolean DEBUG = true;
@SuppressWarnings("unused") int // for debugging
count = 0;
for (String s : toTest) {
if (s.equals(test)) {
logln(test);
}
String t = trans.transform(s);
if (!s.equals(t)) {
if (!isAtomic(s, t, trans)) {
isAtomic(s, t, trans);
continue;
}
// }
if (DEBUG) {
if (!actualSource.containsAll(s)) {
count++;
}
if (!actualTarget.containsAll(t)) {
count++;
}
}
addSourceTarget(s, empiricalSource, t, empiricalTarget);
}
}
assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
}
}
use of android.icu.text.Transliterator in project j2objc by google.
the class TransliteratorTest method TestCompoundKana.
// Latin-Arabic has been temporarily removed until it can be
// done correctly.
// public void TestArabic() {
// String DATA[] = {
// "Arabic",
// "\u062a\u062a\u0645\u062a\u0639 "+
// "\u0627\u0644\u0644\u063a\u0629 "+
// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
// "\u0628\u0628\u0646\u0638\u0645 "+
// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
// "\u062c\u0645\u064a\u0644\u0629"
// };
// Transliterator t = Transliterator.getInstance("Latin-Arabic");
// for (int i=0; i<DATA.length; i+=2) {
// expect(t, DATA[i], DATA[i+1]);
// }
// }
/**
* Compose the Kana transliterator forward and reverse and try
* some strings that should come out unchanged.
*/
@Test
public void TestCompoundKana() {
Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
expect(t, "aaaaa", "aaaaa");
}
use of android.icu.text.Transliterator in project j2objc by google.
the class TransliteratorTest method TestNullInverse.
/**
* Inverse of "Null" should be "Null". (J21)
*/
@Test
public void TestNullInverse() {
Transliterator t = Transliterator.getInstance("Null");
Transliterator u = t.getInverse();
if (!u.getID().equals("Null")) {
errln("FAIL: Inverse of Null should be Null");
}
}
use of android.icu.text.Transliterator in project j2objc by google.
the class TransliteratorTest method TestCompoundInverse.
/**
* Test inverse of Greek-Latin; Title()
*/
@Test
public void TestCompoundInverse() {
Transliterator t = Transliterator.getInstance("Greek-Latin; Title()", Transliterator.REVERSE);
if (t == null) {
errln("FAIL: createInstance");
return;
}
String exp = "(Title);Latin-Greek";
if (t.getID().equals(exp)) {
logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + t.getID());
} else {
errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + t.getID() + "\", expected \"" + exp + "\"");
}
}
use of android.icu.text.Transliterator in project j2objc by google.
the class TransliteratorTest method TestFiltering.
/**
* Do some basic tests of filtering.
*/
@Test
public void TestFiltering() {
Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
tempTrans.setFilter(new UnicodeSet("[a]"));
String tempResult = tempTrans.transform("xa");
assertEquals("context should not be filtered ", "xb", tempResult);
tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
tempResult = tempTrans.transform("xa");
assertEquals("context should not be filtered ", "xb", tempResult);
Transliterator hex = Transliterator.getInstance("Any-Hex");
hex.setFilter(new UnicodeFilter() {
public boolean contains(int c) {
return c != 'c';
}
public String toPattern(boolean escapeUnprintable) {
return "";
}
public boolean matchesIndexValue(int v) {
return false;
}
public void addMatchSetTo(UnicodeSet toUnionTo) {
}
});
String s = "abcde";
String out = hex.transliterate(s);
String exp = "\\u0061\\u0062c\\u0064\\u0065";
if (out.equals(exp)) {
logln("Ok: \"" + exp + "\"");
} else {
logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
}
}
Aggregations