use of android.icu.text.Normalizer2 in project j2objc by google.
the class BasicTest method TestFilteredAppend.
@Test
public void TestFilteredAppend() {
Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
// Append two strings that each contain a character outside the filter set.
StringBuilder sb = new StringBuilder("a\u0313a");
String second = "\u0301\u0313";
assertEquals("append()", "a\u0313á\u0313", fn2.append(sb, second).toString());
// Same, and also normalize the second string.
sb.replace(0, 0x7fffffff, "a\u0313a");
assertEquals("normalizeSecondAndAppend()", "a\u0313á\u0313", fn2.normalizeSecondAndAppend(sb, second).toString());
// Normalizer2.normalize(String) uses spanQuickCheckYes() and normalizeSecondAndAppend().
assertEquals("normalize()", "a\u0313á\u0313", fn2.normalize("a\u0313a\u0301\u0313"));
}
use of android.icu.text.Normalizer2 in project j2objc by google.
the class BasicTest method TestFCD.
@Test
public void TestFCD() {
// Coverage tests.
Normalizer2 fcd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.FCD);
assertTrue("fcd.hasBoundaryAfter(space)", fcd.hasBoundaryAfter(' '));
assertFalse("fcd.hasBoundaryAfter(ä)", fcd.hasBoundaryAfter('ä'));
assertTrue("fcd.isInert(space)", fcd.isInert(' '));
assertFalse("fcd.isInert(ä)", fcd.isInert('ä'));
// This implementation method is unreachable via public API.
Norm2AllModes.FCDNormalizer2 impl = (Norm2AllModes.FCDNormalizer2) fcd;
assertEquals("fcd impl.getQuickCheck(space)", 1, impl.getQuickCheck(' '));
assertEquals("fcd impl.getQuickCheck(ä)", 0, impl.getQuickCheck('ä'));
}
use of android.icu.text.Normalizer2 in project j2objc by google.
the class BasicTest method TestGetEasyToUseInstance.
@Test
public void TestGetEasyToUseInstance() {
// Test input string:
// U+00A0 -> <noBreak> 0020
// U+00C7 0301 = 1E08 = 0043 0327 0301
String in = "\u00A0\u00C7\u0301";
Normalizer2 n2 = Normalizer2.getNFCInstance();
String out = n2.normalize(in);
assertEquals("getNFCInstance() did not return an NFC instance " + "(normalizes to " + prettify(out) + ')', "\u00A0\u1E08", out);
n2 = Normalizer2.getNFDInstance();
out = n2.normalize(in);
assertEquals("getNFDInstance() did not return an NFD instance " + "(normalizes to " + prettify(out) + ')', "\u00A0C\u0327\u0301", out);
n2 = Normalizer2.getNFKCInstance();
out = n2.normalize(in);
assertEquals("getNFKCInstance() did not return an NFKC instance " + "(normalizes to " + prettify(out) + ')', " \u1E08", out);
n2 = Normalizer2.getNFKDInstance();
out = n2.normalize(in);
assertEquals("getNFKDInstance() did not return an NFKD instance " + "(normalizes to " + prettify(out) + ')', " C\u0327\u0301", out);
n2 = Normalizer2.getNFKCCasefoldInstance();
out = n2.normalize(in);
assertEquals("getNFKCCasefoldInstance() did not return an NFKC_Casefold instance " + "(normalizes to " + prettify(out) + ')', " \u1E09", out);
}
use of android.icu.text.Normalizer2 in project j2objc by google.
the class BasicTest method TestGetDecomposition.
@Test
public void TestGetDecomposition() {
Normalizer2 n2 = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE_CONTIGUOUS);
String decomp = n2.getDecomposition(0x20);
assertEquals("fcc.getDecomposition(space) failed", null, decomp);
decomp = n2.getDecomposition(0xe4);
assertEquals("fcc.getDecomposition(a-umlaut) failed", "a\u0308", decomp);
decomp = n2.getDecomposition(0xac01);
assertEquals("fcc.getDecomposition(Hangul syllable U+AC01) failed", "\u1100\u1161\u11a8", decomp);
}
use of android.icu.text.Normalizer2 in project j2objc by google.
the class BasicTest method TestCompare.
@Test
public void TestCompare() {
// at least as many items as in strings[] !
String[] s = new String[100];
int i, j, k, count = strings.length;
int result, refResult;
// create the UnicodeStrings
for (i = 0; i < count; ++i) {
s[i] = Utility.unescape(strings[i]);
}
UTF16.StringComparator comp = new UTF16.StringComparator();
// test them each with each other
for (i = 0; i < count; ++i) {
for (j = i; j < count; ++j) {
for (k = 0; k < opt.length; ++k) {
// test Normalizer::compare
result = norm_compare(s[i], s[j], opt[k].options);
refResult = ref_norm_compare(s[i], s[j], opt[k].options);
if (sign(result) != sign(refResult)) {
errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
}
// test UnicodeString::caseCompare - same internal implementation function
if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
// result=s[i]. (s[j], opt[k].options);
if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
} else {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
}
comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
// result=comp.caseCompare(s[i],s[j], opt[k].options);
result = comp.compare(s[i], s[j]);
refResult = ref_case_compare(s[i], s[j], opt[k].options);
if (sign(result) != sign(refResult)) {
errln("Normalizer::compare( " + i + ", " + j + ", " + k + "( " + opt[k].name + "))=" + result + " should be same sign as " + refResult);
}
}
}
}
}
// test cases with i and I to make sure Turkic works
char[] iI = new char[] { 0x49, 0x69, 0x130, 0x131 };
UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
nfcImpl.ensureCanonIterData();
String s1, s2;
// collect all sets into one for contiguous output
for (i = 0; i < iI.length; ++i) {
if (nfcImpl.getCanonStartSet(iI[i], iSet)) {
set.addAll(iSet);
}
}
// test all of these precomposed characters
Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
UnicodeSetIterator it = new UnicodeSetIterator(set);
int c;
while (it.next() && (c = it.codepoint) != UnicodeSetIterator.IS_STRING) {
s1 = UTF16.valueOf(c);
s2 = nfcNorm2.getDecomposition(c);
for (k = 0; k < opt.length; ++k) {
// test Normalizer::compare
result = norm_compare(s1, s2, opt[k].options);
refResult = ref_norm_compare(s1, s2, opt[k].options);
if (sign(result) != sign(refResult)) {
errln("Normalizer.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
}
// test UnicodeString::caseCompare - same internal implementation function
if ((opt[k].options & Normalizer.COMPARE_IGNORE_CASE) > 0) {
if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
} else {
comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
}
comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
result = comp.compare(s1, s2);
refResult = ref_case_compare(s1, s2, opt[k].options);
if (sign(result) != sign(refResult)) {
errln("UTF16.compare(U+" + hex(c) + " with its NFD, " + opt[k].name + ")" + signString(result) + " should be " + signString(refResult));
}
}
}
}
// test getDecomposition() for some characters that do not decompose
if (nfcNorm2.getDecomposition(0x20) != null || nfcNorm2.getDecomposition(0x4e00) != null || nfcNorm2.getDecomposition(0x20002) != null) {
errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
}
// test getRawDecomposition() for some characters that do not decompose
if (nfcNorm2.getRawDecomposition(0x20) != null || nfcNorm2.getRawDecomposition(0x4e00) != null || nfcNorm2.getRawDecomposition(0x20002) != null) {
errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
}
// test composePair() for some pairs of characters that do not compose
if (nfcNorm2.composePair(0x20, 0x301) >= 0 || nfcNorm2.composePair(0x61, 0x305) >= 0 || nfcNorm2.composePair(0x1100, 0x1160) >= 0 || nfcNorm2.composePair(0xac00, 0x11a7) >= 0) {
errln("NFC.composePair() incorrectly composes some pairs of characters");
}
// test FilteredNormalizer2.getDecomposition()
UnicodeSet filter = new UnicodeSet("[^\u00a0-\u00ff]");
FilteredNormalizer2 fn2 = new FilteredNormalizer2(nfcNorm2, filter);
if (fn2.getDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
}
// test FilteredNormalizer2.getRawDecomposition()
if (fn2.getRawDecomposition(0xe4) != null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
}
// test FilteredNormalizer2::composePair()
if (0x100 != fn2.composePair(0x41, 0x304) || // unfiltered result: U+1E08
fn2.composePair(0xc7, 0x301) >= 0) {
errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
}
}
Aggregations