use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method assertEquals.
void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
boolean haveError = false;
if (!actual.containsAll(empirical)) {
UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
haveError = true;
}
if (!empirical.containsAll(actual)) {
UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
haveError = true;
}
if (!haveError) {
logln("OK " + message + ' ' + toPattern(empirical));
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method TestCoverage.
// ======================================================================
// These tests are not mirrored (yet) in icu4c at
// source/test/intltest/transtst.cpp
// ======================================================================
/**
* Improve code coverage.
*/
@Test
public void TestCoverage() {
// NullTransliterator
Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
expect(t, "a", "a");
// Source, target set
t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
t.setFilter(new UnicodeSet("[A-Z]"));
logln("source = " + t.getSourceSet());
logln("target = " + t.getTargetSet());
t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
logln("source = " + t.getSourceSet());
logln("target = " + t.getTargetSet());
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class TransliteratorTest method TestSourceTargetSet2.
@Test
public void TestSourceTargetSet2() {
Normalizer2 nfc = Normalizer2.getNFCInstance();
Normalizer2 nfd = Normalizer2.getNFDInstance();
// Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
// UnicodeSet nfkdSource = new UnicodeSet();
// UnicodeSet nfkdTarget = new UnicodeSet();
// for (int i = 0; i <= 0x10FFFF; ++i) {
// if (nfkd.isInert(i)) {
// continue;
// }
// nfkdSource.add(i);
// String t = nfkd.getDecomposition(i);
// if (t != null) {
// nfkdTarget.addAll(t);
// } else {
// nfkdTarget.add(i);
// }
// }
// nfkdSource.freeze();
// nfkdTarget.freeze();
// logln("NFKD Source: " + nfkdSource.toPattern(false));
// logln("NFKD Target: " + nfkdTarget.toPattern(false));
UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
CanonicalIterator can = new CanonicalIterator("");
UnicodeSet disorderedMarks = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
String s = nfd.getDecomposition(i);
if (s == null) {
continue;
}
can.setSource(s);
for (String t = can.next(); t != null; t = can.next()) {
disorderedMarks.add(t);
}
// if s has two code points, (or more), add the lead/trail information
int first = s.codePointAt(0);
int firstCount = Character.charCount(first);
if (s.length() == firstCount)
continue;
String trailString = s.substring(firstCount);
// add all the trail characters
if (!nonStarters.containsSome(trailString)) {
continue;
}
UnicodeSet trailSet = leadToTrail.get(first);
if (trailSet == null) {
leadToTrail.put(first, trailSet = new UnicodeSet());
}
// add remaining trails
trailSet.addAll(trailString);
// add the sources
UnicodeSet sourcesSet = leadToSources.get(first);
if (sourcesSet == null) {
leadToSources.put(first, sourcesSet = new UnicodeSet());
}
sourcesSet.add(i);
}
for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
String lead = x.getKey();
UnicodeSet sources = x.getValue();
UnicodeSet trailSet = leadToTrail.get(lead);
for (String source : sources) {
for (String trail : trailSet) {
can.setSource(source + trail);
for (String t = can.next(); t != null; t = can.next()) {
if (t.endsWith(trail))
continue;
disorderedMarks.add(t);
}
}
}
}
for (String s : nonStarters) {
disorderedMarks.add("\u0345" + s);
disorderedMarks.add(s + "\u0323");
String xx = nfc.normalize("\u01EC" + s);
if (!xx.startsWith("\u01EC")) {
logln("??");
}
}
// for (int i = 0; i <= 0x10FFFF; ++i) {
// String s = nfkd.getDecomposition(i);
// if (s != null) {
// disorderedMarks.add(s);
// disorderedMarks.add(nfc.normalize(s));
// addDerivedStrings(nfc, disorderedMarks, s);
// }
// s = nfd.getDecomposition(i);
// if (s != null) {
// disorderedMarks.add(s);
// }
// if (!nfc.isInert(i)) {
// if (i == 0x00C0) {
// logln("\u00C0");
// }
// can.setSource(s+"\u0334");
// for (String t = can.next(); t != null; t = can.next()) {
// addDerivedStrings(nfc, disorderedMarks, t);
// }
// can.setSource(s+"\u0345");
// for (String t = can.next(); t != null; t = can.next()) {
// addDerivedStrings(nfc, disorderedMarks, t);
// }
// can.setSource(s+"\u0323");
// for (String t = can.next(); t != null; t = can.next()) {
// addDerivedStrings(nfc, disorderedMarks, t);
// }
// }
// }
logln("Test cases: " + disorderedMarks.size());
disorderedMarks.addAll(0, 0x10FFFF).freeze();
logln("isInert \u0104 " + nfc.isInert('\u0104'));
Object[][] rules = { { ":: [:sc=COMMON:] any-name;", null }, { ":: [:Greek:] hex-any/C;", null }, { ":: [:Greek:] any-hex/C;", null }, { ":: [[:Mn:][:Me:]] remove;", null }, { ":: [[:Mn:][:Me:]] null;", null }, { ":: lower;", null }, { ":: upper;", null }, { ":: title;", null }, { ":: CaseFold;", null }, { ":: NFD;", null }, { ":: NFC;", null }, { ":: NFKD;", null }, { ":: NFKC;", null }, { ":: [[:Mn:][:Me:]] NFKD;", null }, { ":: Latin-Greek;", null }, { ":: [:Latin:] NFKD;", null }, { ":: NFKD;", null }, { ":: NFKD;\n" + ":: [[:Mn:][:Me:]] remove;\n" + ":: NFC;", null } };
for (Object[] rulex : rules) {
String rule = (String) rulex[0];
Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
UnicodeSet actualSource = trans.getSourceSet();
UnicodeSet actualTarget = trans.getTargetSet();
UnicodeSet empiricalSource = new UnicodeSet();
UnicodeSet empiricalTarget = new UnicodeSet();
String ruleDisplay = rule.replace("\n", "\t\t");
UnicodeSet toTest = disorderedMarks;
// if (rulex[1] != null) {
// toTest = new UnicodeSet(disorderedMarks);
// toTest.addAll((UnicodeSet) rulex[1]);
// }
String test = nfd.normalize("\u0104");
boolean DEBUG = true;
@SuppressWarnings("unused") int // for debugging
count = 0;
for (String s : toTest) {
if (s.equals(test)) {
logln(test);
}
String t = trans.transform(s);
if (!s.equals(t)) {
if (!isAtomic(s, t, trans)) {
isAtomic(s, t, trans);
continue;
}
// }
if (DEBUG) {
if (!actualSource.containsAll(s)) {
count++;
}
if (!actualTarget.containsAll(t)) {
count++;
}
}
addSourceTarget(s, empiricalSource, t, empiricalTarget);
}
}
assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class UnicodeMap method composeWith.
public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) {
for (T value : other.getAvailableValues()) {
UnicodeSet set = other.keySet(value);
composeWith(set, value, composer);
}
return this;
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class UnicodeMap method toString.
public String toString(Comparator<T> collected) {
StringBuffer result = new StringBuffer();
if (collected == null) {
for (int i = 0; i < length - 1; ++i) {
T value = values[i];
if (value == null)
continue;
int start = transitions[i];
int end = transitions[i + 1] - 1;
result.append(Utility.hex(start));
if (start != end)
result.append("-").append(Utility.hex(end));
result.append("=").append(value.toString()).append("\n");
}
if (stringMap != null) {
for (String s : stringMap.keySet()) {
result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n");
}
}
} else {
Set<T> set = values(new TreeSet<T>(collected));
for (Iterator<T> it = set.iterator(); it.hasNext(); ) {
T value = it.next();
UnicodeSet s = keySet(value);
result.append(value).append("=").append(s.toString()).append("\n");
}
}
return result.toString();
}
Aggregations