use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class UnicodeSetTest method TestAPI.
@Test
public void TestAPI() {
// default ct
UnicodeSet set = new UnicodeSet();
if (!set.isEmpty() || set.getRangeCount() != 0) {
errln("FAIL, set should be empty but isn't: " + set);
}
// clear(), isEmpty()
set.add('a');
if (set.isEmpty()) {
errln("FAIL, set shouldn't be empty but is: " + set);
}
set.clear();
if (!set.isEmpty()) {
errln("FAIL, set should be empty but isn't: " + set);
}
// size()
set.clear();
if (set.size() != 0) {
errln("FAIL, size should be 0, but is " + set.size() + ": " + set);
}
set.add('a');
if (set.size() != 1) {
errln("FAIL, size should be 1, but is " + set.size() + ": " + set);
}
set.add('1', '9');
if (set.size() != 10) {
errln("FAIL, size should be 10, but is " + set.size() + ": " + set);
}
set.clear();
set.complement();
if (set.size() != 0x110000) {
errln("FAIL, size should be 0x110000, but is" + set.size());
}
// contains(first, last)
set.clear();
set.applyPattern("[A-Y 1-8 b-d l-y]");
for (int i = 0; i < set.getRangeCount(); ++i) {
int a = set.getRangeStart(i);
int b = set.getRangeEnd(i);
if (!set.contains(a, b)) {
errln("FAIL, should contain " + (char) a + '-' + (char) b + " but doesn't: " + set);
}
if (set.contains((char) (a - 1), b)) {
errln("FAIL, shouldn't contain " + (char) (a - 1) + '-' + (char) b + " but does: " + set);
}
if (set.contains(a, (char) (b + 1))) {
errln("FAIL, shouldn't contain " + (char) a + '-' + (char) (b + 1) + " but does: " + set);
}
}
// Ported InversionList test.
UnicodeSet a = new UnicodeSet((char) 3, (char) 10);
UnicodeSet b = new UnicodeSet((char) 7, (char) 15);
UnicodeSet c = new UnicodeSet();
logln("a [3-10]: " + a);
logln("b [7-15]: " + b);
c.set(a);
c.addAll(b);
UnicodeSet exp = new UnicodeSet((char) 3, (char) 15);
if (c.equals(exp)) {
logln("c.set(a).add(b): " + c);
} else {
errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
}
c.complement();
exp.set((char) 0, (char) 2);
exp.add((char) 16, UnicodeSet.MAX_VALUE);
if (c.equals(exp)) {
logln("c.complement(): " + c);
} else {
errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
}
c.complement();
exp.set((char) 3, (char) 15);
if (c.equals(exp)) {
logln("c.complement(): " + c);
} else {
errln("FAIL: c.complement() = " + c + ", expect " + exp);
}
c.set(a);
c.complementAll(b);
exp.set((char) 3, (char) 6);
exp.add((char) 11, (char) 15);
if (c.equals(exp)) {
logln("c.set(a).complement(b): " + c);
} else {
errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
}
exp.set(c);
c = bitsToSet(setToBits(c));
if (c.equals(exp)) {
logln("bitsToSet(setToBits(c)): " + c);
} else {
errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
}
// Additional tests for coverage JB#2118
// UnicodeSet::complement(class UnicodeString const &)
// UnicodeSet::complementAll(class UnicodeString const &)
// UnicodeSet::containsNone(class UnicodeSet const &)
// UnicodeSet::containsNone(long,long)
// UnicodeSet::containsSome(class UnicodeSet const &)
// UnicodeSet::containsSome(long,long)
// UnicodeSet::removeAll(class UnicodeString const &)
// UnicodeSet::retain(long)
// UnicodeSet::retainAll(class UnicodeString const &)
// UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
// UnicodeSetIterator::getString(void)
set.clear();
set.complement("ab");
exp.applyPattern("[{ab}]");
if (!set.equals(exp)) {
errln("FAIL: complement(\"ab\")");
return;
}
UnicodeSetIterator iset = new UnicodeSetIterator(set);
if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
errln("FAIL: UnicodeSetIterator.next/IS_STRING");
} else if (!iset.string.equals("ab")) {
errln("FAIL: UnicodeSetIterator.string");
}
set.add((char) 0x61, (char) 0x7A);
set.complementAll("alan");
exp.applyPattern("[{ab}b-kmo-z]");
if (!set.equals(exp)) {
errln("FAIL: complementAll(\"alan\")");
return;
}
exp.applyPattern("[a-z]");
if (set.containsNone(exp)) {
errln("FAIL: containsNone(UnicodeSet)");
}
if (!set.containsSome(exp)) {
errln("FAIL: containsSome(UnicodeSet)");
}
exp.applyPattern("[aln]");
if (!set.containsNone(exp)) {
errln("FAIL: containsNone(UnicodeSet)");
}
if (set.containsSome(exp)) {
errln("FAIL: containsSome(UnicodeSet)");
}
if (set.containsNone((char) 0x61, (char) 0x7A)) {
errln("FAIL: containsNone(char, char)");
}
if (!set.containsSome((char) 0x61, (char) 0x7A)) {
errln("FAIL: containsSome(char, char)");
}
if (!set.containsNone((char) 0x41, (char) 0x5A)) {
errln("FAIL: containsNone(char, char)");
}
if (set.containsSome((char) 0x41, (char) 0x5A)) {
errln("FAIL: containsSome(char, char)");
}
set.removeAll("liu");
exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
if (!set.equals(exp)) {
errln("FAIL: removeAll(\"liu\")");
return;
}
set.retainAll("star");
exp.applyPattern("[rst]");
if (!set.equals(exp)) {
errln("FAIL: retainAll(\"star\")");
return;
}
set.retain((char) 0x73);
exp.applyPattern("[s]");
if (!set.equals(exp)) {
errln("FAIL: retain('s')");
return;
}
// ICU 2.6 coverage tests
// public final UnicodeSet retain(String s);
// public final UnicodeSet remove(int c);
// public final UnicodeSet remove(String s);
// public int hashCode();
set.applyPattern("[a-z{ab}{cd}]");
set.retain("cd");
exp.applyPattern("[{cd}]");
if (!set.equals(exp)) {
errln("FAIL: retain(\"cd\")");
return;
}
set.applyPattern("[a-z{ab}{cd}]");
set.remove((char) 0x63);
exp.applyPattern("[abd-z{ab}{cd}]");
if (!set.equals(exp)) {
errln("FAIL: remove('c')");
return;
}
set.remove("cd");
exp.applyPattern("[abd-z{ab}]");
if (!set.equals(exp)) {
errln("FAIL: remove(\"cd\")");
return;
}
if (set.hashCode() != exp.hashCode()) {
errln("FAIL: hashCode() unequal");
}
exp.clear();
if (set.hashCode() == exp.hashCode()) {
errln("FAIL: hashCode() equal");
}
{
// Cover addAll(Collection) and addAllTo(Collection)
// Seems that there is a bug in addAll(Collection) operation
// Ram also add a similar test to UtilityTest.java
logln("Testing addAll(Collection) ... ");
String[] array = { "a", "b", "c", "de" };
List list = Arrays.asList(array);
Set aset = new HashSet(list);
logln(" *** The source set's size is: " + aset.size());
set.clear();
set.addAll(aset);
if (set.size() != aset.size()) {
errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() + ", " + set.size() + " seen instead!");
} else {
logln("OK: After addAll, the UnicodeSet size got " + set.size());
}
List list2 = new ArrayList();
set.addAllTo(list2);
// verify the result
log(" *** The elements are: ");
String s = set.toPattern(true);
logln(s);
Iterator myiter = list2.iterator();
while (myiter.hasNext()) {
log(myiter.next().toString() + " ");
}
// a new line
logln("");
}
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class UnicodeSetTest method copyWithIterator.
UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
UnicodeSet t = new UnicodeSet();
UnicodeSetIterator it = new UnicodeSetIterator(s);
if (withRange) {
while (it.nextRange()) {
if (it.codepoint == UnicodeSetIterator.IS_STRING) {
t.add(it.string);
} else {
t.add(it.codepoint, it.codepointEnd);
}
}
} else {
while (it.next()) {
if (it.codepoint == UnicodeSetIterator.IS_STRING) {
t.add(it.string);
} else {
t.add(it.codepoint);
}
}
}
return t;
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationDataBuilder method suppressContractions.
void suppressContractions(UnicodeSet set) {
if (set.isEmpty()) {
return;
}
UnicodeSetIterator iter = new UnicodeSetIterator(set);
while (iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
int c = iter.codepoint;
int ce32 = trie.get(c);
if (ce32 == Collation.FALLBACK_CE32) {
ce32 = base.getFinalCE32(base.getCE32(c));
if (Collation.ce32HasContext(ce32)) {
ce32 = copyFromBaseCE32(c, ce32, false);
trie.set(c, ce32);
}
} else if (isBuilderContextCE32(ce32)) {
ce32 = getConditionalCE32ForCE32(ce32).ce32;
// Simply abandon the list of ConditionalCE32.
// The caller will copy this builder in the end,
// eliminating unreachable data.
trie.set(c, ce32);
contextChars.remove(c);
}
}
modified = true;
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationDataBuilder method optimize.
void optimize(UnicodeSet set) {
if (set.isEmpty()) {
return;
}
UnicodeSetIterator iter = new UnicodeSetIterator(set);
while (iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
int c = iter.codepoint;
int ce32 = trie.get(c);
if (ce32 == Collation.FALLBACK_CE32) {
ce32 = base.getFinalCE32(base.getCE32(c));
ce32 = copyFromBaseCE32(c, ce32, true);
trie.set(c, ce32);
}
}
modified = true;
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class WriteCharts method print.
public static void print(String testSet, String rawId) throws IOException {
System.out.println("Processing " + rawId);
Transliterator t = Transliterator.getInstance(rawId);
String id = t.getID();
// clean up IDs. Ought to be API for getting source, target, variant
int minusPos = id.indexOf('-');
String source = id.substring(0, minusPos);
String target = id.substring(minusPos + 1);
int slashPos = target.indexOf('/');
if (slashPos >= 0)
target = target.substring(0, slashPos);
// check that the source is a script
if (testSet.equals("")) {
int[] scripts = UScript.getCode(source);
if (scripts == null) {
System.out.println("FAILED: " + Transliterator.getDisplayName(id) + " does not have a script as the source");
return;
} else {
testSet = "[:" + source + ":]";
if (source.equalsIgnoreCase("katakana")) {
testSet = "[" + testSet + "\u30FC]";
printSet(testSet);
}
}
}
UnicodeSet sourceSet = new UnicodeSet(testSet);
// check that the target is a script
int[] scripts = UScript.getCode(target);
if (scripts == null) {
target = "[:Latin:]";
} else {
target = "[:" + target + ":]";
}
UnicodeSet targetSet = new UnicodeSet(target);
Transliterator inverse = t.getInverse();
// Transliterator hex = Transliterator.getInstance("Any-Hex");
// iterate through script
System.out.println("Transliterating " + sourceSet.toPattern(true) + " with " + Transliterator.getDisplayName(id));
UnicodeSet leftOverSet = new UnicodeSet(targetSet);
UnicodeSet privateUse = new UnicodeSet("[:private use:]");
Map map = new TreeMap();
UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
targetSetPlusAnyways.addAll(okAnyway);
UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
sourceSetPlusAnyways.addAll(okAnyway);
UnicodeSetIterator usi = new UnicodeSetIterator(sourceSet);
while (usi.next()) {
int j = usi.codepoint;
/*
int count = sourceSet.getRangeCount();
for (int i = 0; i < count; ++i) {
int end = sourceSet.getRangeEnd(i);
for (int j = sourceSet.getRangeStart(i); j <= end; ++j) {
*/
// String flag = "";
String ss = UTF16.valueOf(j);
String ts = t.transliterate(ss);
char group = 0;
if (!targetSetPlusAnyways.containsAll(ts)) {
group |= 1;
}
if (UTF16.countCodePoint(ts) == 1) {
leftOverSet.remove(UTF16.charAt(ts, 0));
}
String rt = inverse.transliterate(ts);
if (!sourceSetPlusAnyways.containsAll(rt)) {
group |= 2;
} else if (!ss.equals(rt)) {
group |= 4;
}
if (!privateUse.containsNone(ts) || !privateUse.containsNone(rt)) {
group |= 16;
}
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.NFKD)) + "\u0000" + ss, "<td class='s'>" + ss + "<br><tt>" + hex(ss) + "</tt></td><td class='t'>" + ts + "<br><tt>" + hex(ts) + "</tt></td><td class='r'>" + rt + "<br><tt>" + hex(rt) + "</tt></td>");
// Check Duals
/*
int maxDual = 200;
dual:
for (int i2 = 0; i2 < count; ++i2) {
int end2 = sourceSet.getRangeEnd(i2);
for (int j2 = sourceSet.getRangeStart(i2); j2 <= end; ++j2) {
String ss2 = UTF16.valueOf(j2);
String ts2 = t.transliterate(ss2);
String rt2 = inverse.transliterate(ts2);
String ss12 = ss + ss2;
String ts12 = t.transliterate(ss + ss12);
String rt12 = inverse.transliterate(ts12);
if (ts12.equals(ts + ts2) && rt12.equals(rt + rt2)) continue;
if (--maxDual < 0) break dual;
// transliteration of whole differs from that of parts
group = 0x100;
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss12, Normalizer.DECOMP_COMPAT, 0))
+ "\u0000" + ss12,
"<td class='s'>" + ss12 + "<br><tt>" + hex(ss12)
+ "</tt></td><td class='t'>" + ts12 + "<br><tt>" + hex(ts12)
+ "</tt></td><td class='r'>" + rt12 + "<br><tt>" + hex(rt12) + "</tt></td>" );
}
}
*/
// }
}
// remove extended & IPA
leftOverSet.remove(0x0100, 0x02FF);
/*int count = leftOverSet.getRangeCount();
for (int i = 0; i < count; ++i) {
int end = leftOverSet.getRangeEnd(i);
for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
*/
usi.reset(leftOverSet);
while (usi.next()) {
int j = usi.codepoint;
String ts = UTF16.valueOf(j);
// String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
// if (!decomp.equals(ts)) continue;
String rt = inverse.transliterate(ts);
// String flag = "";
char group = 0x80;
if (!sourceSetPlusAnyways.containsAll(rt)) {
group |= 8;
}
if (!privateUse.containsNone(rt)) {
group |= 16;
}
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.NFKD)) + ts, "<td class='s'>-</td><td class='t'>" + ts + "<br><tt>" + hex(ts) + "</tt></td><td class='r'>" + rt + "<br><tt>" + hex(rt) + "</tt></td>");
// }
}
// make file name and open
File f = new File("transliteration/chart_" + id.replace('/', '_') + ".html");
String filename = f.getCanonicalFile().toString();
PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8"));
// out.print('\uFEFF'); // BOM
System.out.println("Writing " + filename);
try {
out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
out.println("<HTML><HEAD>");
out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
out.println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>");
out.println("<BODY>");
out.println("<h1>Transliteration Samples for '" + Transliterator.getDisplayName(id) + "'</h1>");
out.println("<p>This file illustrates the transliterations of " + Transliterator.getDisplayName(id) + ".");
out.println("The samples are mechanically generated, and only include single characters");
out.println("from the source set. Thus it will <i>not</i> contain examples where the transliteration");
out.println("depends on the context around the character. For a more detailed -- and interactive -- example, see the");
out.println("<a href='http://demo.icu-project.org/icu-bin/translit'>Transliteration Demo</a></p><hr>");
// set up the headers
int columnCount = 3;
String headerBase = "<th>Source</th><th>Target</th><th>Return</th>";
String headers = headerBase;
for (int i = columnCount - 1; i > 0; --i) {
if (i != columnCount - 1)
headers += "<th> </th>";
headers += headerBase;
}
String tableHeader = "<p><table border='1'><tr>" + headers + "</tr>";
String tableFooter = "</table></p>";
out.println("<h2>Round Trip</h2>");
out.println(tableHeader);
Iterator it = map.keySet().iterator();
char lastGroup = 0;
int count = 0;
int column = 0;
while (it.hasNext()) {
String key = (String) it.next();
char group = key.charAt(0);
if (group != lastGroup || count++ > 50) {
lastGroup = group;
count = 0;
if (column != 0) {
out.println("</tr>");
column = 0;
}
out.println(tableFooter);
// String title = "";
if ((group & 0x100) != 0)
out.println("<hr><h2>Duals</h2>");
else if ((group & 0x80) != 0)
out.println("<hr><h2>Completeness</h2>");
else
out.println("<hr><h2>Round Trip</h2>");
if ((group & 16) != 0)
out.println("<h3>Errors: Contains Private Use Characters</h3>");
if ((group & 8) != 0)
out.println("<h3>Possible Errors: Return not in Source Set</h3>");
if ((group & 4) != 0)
out.println("<h3>One-Way Mapping: Return not equal to Source</h3>");
if ((group & 2) != 0)
out.println("<h3>Errors: Return not in Source Set</h3>");
if ((group & 1) != 0)
out.println("<h3>Errors: Target not in Target Set</h3>");
out.println(tableHeader);
column = 0;
}
String value = (String) map.get(key);
if (column++ == 0)
out.print("<tr>");
else
out.print("<th> </th>");
out.println(value);
if (column == 3) {
out.println("</tr>");
column = 0;
}
}
if (column != 0) {
out.println("</tr>");
column = 0;
}
out.println(tableFooter + "</BODY></HTML>");
} finally {
out.close();
}
}
Aggregations