use of android.icu.text.UnicodeSet in project j2objc by google.
the class BasicTest method TestCompare.
@Test
public void TestCompare() {
// at least as many items as in strings[] !
String[] s = new String[100];
int i, j, k, count = strings.length;
int result, refResult;
// create the UnicodeStrings
for (i = 0; i < count; ++i) {
s[i] = Utility.unescape(strings[i]);
}
UTF16.StringComparator comp = new UTF16.StringComparator();
// test them each with each other
for (i = 0; i < count; ++i) {
for (j = i; j < count; ++j) {
for (k = 0; k < opt.length; ++k) {
// test Normalizer::compare
result = norm_compare(s[i], s[j], opt[k].options);
refResult = ref_norm_compare(s[i], s[j], opt[k].options);
if (sign(result) != sign(refResult)) {
errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
}
// test UnicodeString::caseCompare - same internal implementation function
if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
// result=s[i]. (s[j], opt[k].options);
if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
} else {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
}
comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
// result=comp.caseCompare(s[i],s[j], opt[k].options);
result = comp.compare(s[i], s[j]);
refResult = ref_case_compare(s[i], s[j], opt[k].options);
if (sign(result) != sign(refResult)) {
errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
}
}
}
}
}
// test cases with i and I to make sure Turkic works
char[] iI = new char[] { 0x49, 0x69, 0x130, 0x131 };
UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
nfcImpl.ensureCanonIterData();
String s1, s2;
// collect all sets into one for contiguous output
for (i = 0; i < iI.length; ++i) {
if (nfcImpl.getCanonStartSet(iI[i], iSet)) {
set.addAll(iSet);
}
}
// test all of these precomposed characters
Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
UnicodeSetIterator it = new UnicodeSetIterator(set);
int c;
while (it.next() && (c = it.codepoint) != UnicodeSetIterator.IS_STRING) {
s1 = UTF16.valueOf(c);
s2 = nfcNorm2.getDecomposition(c);
for (k = 0; k < opt.length; ++k) {
// test Normalizer::compare
result = norm_compare(s1, s2, opt[k].options);
refResult = ref_norm_compare(s1, s2, opt[k].options);
if (sign(result) != sign(refResult)) {
errln("Normalizer.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
}
// test UnicodeString::caseCompare - same internal implementation function
if ((opt[k].options & Normalizer.COMPARE_IGNORE_CASE) > 0) {
if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
} else {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
}
comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
result = comp.compare(s1, s2);
refResult = ref_case_compare(s1, s2, opt[k].options);
if (sign(result) != sign(refResult)) {
errln("UTF16.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
}
}
}
}
// test getDecomposition() for some characters that do not decompose
if (nfcNorm2.getDecomposition(0x20) != null || nfcNorm2.getDecomposition(0x4e00) != null || nfcNorm2.getDecomposition(0x20002) != null) {
errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
}
// test getRawDecomposition() for some characters that do not decompose
if (nfcNorm2.getRawDecomposition(0x20) != null || nfcNorm2.getRawDecomposition(0x4e00) != null || nfcNorm2.getRawDecomposition(0x20002) != null) {
errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
}
// test composePair() for some pairs of characters that do not compose
if (nfcNorm2.composePair(0x20, 0x301) >= 0 || nfcNorm2.composePair(0x61, 0x305) >= 0 || nfcNorm2.composePair(0x1100, 0x1160) >= 0 || nfcNorm2.composePair(0xac00, 0x11a7) >= 0) {
errln("NFC.composePair() incorrectly composes some pairs of characters");
}
// test FilteredNormalizer2.getDecomposition()
UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff]");
FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
if (fn2.getDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
}
// test FilteredNormalizer2.getRawDecomposition()
if (fn2.getRawDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
}
// test FilteredNormalizer2::composePair()
if (0x100 != fn2.composePair(0x41, 0x304) || // unfiltered result: U+1E08
fn2.composePair(0xc7, 0x301) >= 0) {
errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class BasicTest method TestFilteredAppend.
@Test
public void TestFilteredAppend() {
Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
// Append two strings that each contain a character outside the filter set.
StringBuilder sb = new StringBuilder("a\u0313a");
String second = "\u0301\u0313";
assertEquals("append()", "a\u0313á\u0313", fn2.append(sb, second).toString());
// Same, and also normalize the second string.
sb.replace(0, 0x7fffffff, "a\u0313a");
assertEquals("normalizeSecondAndAppend()", "a\u0313á\u0313", fn2.normalizeSecondAndAppend(sb, second).toString());
// Normalizer2.normalize(String) uses spanQuickCheckYes() and normalizeSecondAndAppend().
assertEquals("normalize()", "a\u0313á\u0313", fn2.normalize("a\u0313a\u0301\u0313"));
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class UnicodeSetTest method TestAPI.
@Test
public void TestAPI() {
// default ct
UnicodeSet set = new UnicodeSet();
if (!set.isEmpty() || set.getRangeCount() != 0) {
errln("FAIL, set should be empty but isn't: " + set);
}
// clear(), isEmpty()
set.add('a');
if (set.isEmpty()) {
errln("FAIL, set shouldn't be empty but is: " + set);
}
set.clear();
if (!set.isEmpty()) {
errln("FAIL, set should be empty but isn't: " + set);
}
// size()
set.clear();
if (set.size() != 0) {
errln("FAIL, size should be 0, but is " + set.size() + ": " + set);
}
set.add('a');
if (set.size() != 1) {
errln("FAIL, size should be 1, but is " + set.size() + ": " + set);
}
set.add('1', '9');
if (set.size() != 10) {
errln("FAIL, size should be 10, but is " + set.size() + ": " + set);
}
set.clear();
set.complement();
if (set.size() != 0x110000) {
errln("FAIL, size should be 0x110000, but is" + set.size());
}
// contains(first, last)
set.clear();
set.applyPattern("[A-Y 1-8 b-d l-y]");
for (int i = 0; i < set.getRangeCount(); ++i) {
int a = set.getRangeStart(i);
int b = set.getRangeEnd(i);
if (!set.contains(a, b)) {
errln("FAIL, should contain " + (char) a + '-' + (char) b + " but doesn't: " + set);
}
if (set.contains((char) (a - 1), b)) {
errln("FAIL, shouldn't contain " + (char) (a - 1) + '-' + (char) b + " but does: " + set);
}
if (set.contains(a, (char) (b + 1))) {
errln("FAIL, shouldn't contain " + (char) a + '-' + (char) (b + 1) + " but does: " + set);
}
}
// Ported InversionList test.
UnicodeSet a = new UnicodeSet((char) 3, (char) 10);
UnicodeSet b = new UnicodeSet((char) 7, (char) 15);
UnicodeSet c = new UnicodeSet();
logln("a [3-10]: " + a);
logln("b [7-15]: " + b);
c.set(a);
c.addAll(b);
UnicodeSet exp = new UnicodeSet((char) 3, (char) 15);
if (c.equals(exp)) {
logln("c.set(a).add(b): " + c);
} else {
errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
}
c.complement();
exp.set((char) 0, (char) 2);
exp.add((char) 16, UnicodeSet.MAX_VALUE);
if (c.equals(exp)) {
logln("c.complement(): " + c);
} else {
errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
}
c.complement();
exp.set((char) 3, (char) 15);
if (c.equals(exp)) {
logln("c.complement(): " + c);
} else {
errln("FAIL: c.complement() = " + c + ", expect " + exp);
}
c.set(a);
c.complementAll(b);
exp.set((char) 3, (char) 6);
exp.add((char) 11, (char) 15);
if (c.equals(exp)) {
logln("c.set(a).complement(b): " + c);
} else {
errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
}
exp.set(c);
c = bitsToSet(setToBits(c));
if (c.equals(exp)) {
logln("bitsToSet(setToBits(c)): " + c);
} else {
errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
}
// Additional tests for coverage JB#2118
// UnicodeSet::complement(class UnicodeString const &)
// UnicodeSet::complementAll(class UnicodeString const &)
// UnicodeSet::containsNone(class UnicodeSet const &)
// UnicodeSet::containsNone(long,long)
// UnicodeSet::containsSome(class UnicodeSet const &)
// UnicodeSet::containsSome(long,long)
// UnicodeSet::removeAll(class UnicodeString const &)
// UnicodeSet::retain(long)
// UnicodeSet::retainAll(class UnicodeString const &)
// UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
// UnicodeSetIterator::getString(void)
set.clear();
set.complement("ab");
exp.applyPattern("[{ab}]");
if (!set.equals(exp)) {
errln("FAIL: complement(\"ab\")");
return;
}
UnicodeSetIterator iset = new UnicodeSetIterator(set);
if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
errln("FAIL: UnicodeSetIterator.next/IS_STRING");
} else if (!iset.string.equals("ab")) {
errln("FAIL: UnicodeSetIterator.string");
}
set.add((char) 0x61, (char) 0x7A);
set.complementAll("alan");
exp.applyPattern("[{ab}b-kmo-z]");
if (!set.equals(exp)) {
errln("FAIL: complementAll(\"alan\")");
return;
}
exp.applyPattern("[a-z]");
if (set.containsNone(exp)) {
errln("FAIL: containsNone(UnicodeSet)");
}
if (!set.containsSome(exp)) {
errln("FAIL: containsSome(UnicodeSet)");
}
exp.applyPattern("[aln]");
if (!set.containsNone(exp)) {
errln("FAIL: containsNone(UnicodeSet)");
}
if (set.containsSome(exp)) {
errln("FAIL: containsSome(UnicodeSet)");
}
if (set.containsNone((char) 0x61, (char) 0x7A)) {
errln("FAIL: containsNone(char, char)");
}
if (!set.containsSome((char) 0x61, (char) 0x7A)) {
errln("FAIL: containsSome(char, char)");
}
if (!set.containsNone((char) 0x41, (char) 0x5A)) {
errln("FAIL: containsNone(char, char)");
}
if (set.containsSome((char) 0x41, (char) 0x5A)) {
errln("FAIL: containsSome(char, char)");
}
set.removeAll("liu");
exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
if (!set.equals(exp)) {
errln("FAIL: removeAll(\"liu\")");
return;
}
set.retainAll("star");
exp.applyPattern("[rst]");
if (!set.equals(exp)) {
errln("FAIL: retainAll(\"star\")");
return;
}
set.retain((char) 0x73);
exp.applyPattern("[s]");
if (!set.equals(exp)) {
errln("FAIL: retain('s')");
return;
}
// ICU 2.6 coverage tests
// public final UnicodeSet retain(String s);
// public final UnicodeSet remove(int c);
// public final UnicodeSet remove(String s);
// public int hashCode();
set.applyPattern("[a-z{ab}{cd}]");
set.retain("cd");
exp.applyPattern("[{cd}]");
if (!set.equals(exp)) {
errln("FAIL: retain(\"cd\")");
return;
}
set.applyPattern("[a-z{ab}{cd}]");
set.remove((char) 0x63);
exp.applyPattern("[abd-z{ab}{cd}]");
if (!set.equals(exp)) {
errln("FAIL: remove('c')");
return;
}
set.remove("cd");
exp.applyPattern("[abd-z{ab}]");
if (!set.equals(exp)) {
errln("FAIL: remove(\"cd\")");
return;
}
if (set.hashCode() != exp.hashCode()) {
errln("FAIL: hashCode() unequal");
}
exp.clear();
if (set.hashCode() == exp.hashCode()) {
errln("FAIL: hashCode() equal");
}
{
// Cover addAll(Collection) and addAllTo(Collection)
// Seems that there is a bug in addAll(Collection) operation
// Ram also add a similar test to UtilityTest.java
logln("Testing addAll(Collection) ... ");
String[] array = { "a", "b", "c", "de" };
List list = Arrays.asList(array);
Set aset = new HashSet(list);
logln(" *** The source set's size is: " + aset.size());
set.clear();
set.addAll(aset);
if (set.size() != aset.size()) {
errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() + ", " + set.size() + " seen instead!");
} else {
logln("OK: After addAll, the UnicodeSet size got " + set.size());
}
List list2 = new ArrayList();
set.addAllTo(list2);
// verify the result
log(" *** The elements are: ");
String s = set.toPattern(true);
logln(s);
Iterator myiter = list2.iterator();
while (myiter.hasNext()) {
log(myiter.next().toString() + " ");
}
// a new line
logln("");
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class UnicodeSetTest method toPatternAux.
public boolean toPatternAux(int start, int end) {
// use Integer.toString because Utility.hex doesn't handle ints
String source = "0x" + Integer.toString(start, 16).toUpperCase();
if (start != end)
source += "..0x" + Integer.toString(end, 16).toUpperCase();
UnicodeSet testSet = new UnicodeSet();
testSet.add(start, end);
return checkPat(source, testSet);
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class UnicodeSetTest method checkRoundTrip.
/**
* Basic consistency check for a few items.
* That the iterator works, and that we can create a pattern and
* get the same thing back
*/
void checkRoundTrip(UnicodeSet s) {
String pat = s.toPattern(false);
UnicodeSet t = copyWithIterator(s, false);
checkEqual(s, t, "iterator roundtrip");
// try range
t = copyWithIterator(s, true);
checkEqual(s, t, "iterator roundtrip");
t = new UnicodeSet(pat);
checkEqual(s, t, "toPattern(false)");
pat = s.toPattern(true);
t = new UnicodeSet(pat);
checkEqual(s, t, "toPattern(true)");
}
Aggregations