use of android.icu.text.CollationElementIterator in project j2objc by google.
the class CollationIteratorTest method TestUnicodeChar.
/**
* Test for CollationElementIterator previous and next for the whole set of
* unicode characters.
*/
@Test
public void TestUnicodeChar() {
RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
CollationElementIterator iter;
char codepoint;
StringBuffer source = new StringBuffer();
source.append("\u0e4d\u0e4e\u0e4f");
// source.append("\u04e8\u04e9");
iter = en_us.getCollationElementIterator(source.toString());
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
for (codepoint = 1; codepoint < 0xFFFE; ) {
source.delete(0, source.length());
while (codepoint % 0xFF != 0) {
if (UCharacter.isDefined(codepoint)) {
source.append(codepoint);
}
codepoint++;
}
if (UCharacter.isDefined(codepoint)) {
source.append(codepoint);
}
if (codepoint != 0xFFFF) {
codepoint++;
}
/*if (codepoint >= 0x04fc) {
System.out.println("codepoint " + Integer.toHexString(codepoint));
String str = source.substring(230, 232);
System.out.println(android.icu.impl.Utility.escape(str));
System.out.println("codepoint " + Integer.toHexString(codepoint)
+ "length " + str.length());
iter = en_us.getCollationElementIterator(str);
CollationTest.backAndForth(this, iter);
}
*/
iter = en_us.getCollationElementIterator(source.toString());
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
}
}
use of android.icu.text.CollationElementIterator in project j2objc by google.
the class CollationIteratorTest method TestSearchCollatorElements.
/**
* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
* normalization on AND jamo tailoring, among other things.
*
* Note: This test is sensitive to changes of the root collator,
* for example whether the ae-ligature maps to three CEs (as in the DUCET)
* or to two CEs (as in the CLDR 24 FractionalUCA.txt).
* It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
* For example, the DUCET's artificial secondary CE in the ae-ligature
* may map to two 32-bit iterator CEs (as it did until ICU 52).
*/
@Test
public void TestSearchCollatorElements() {
String tsceText = // simple LV Hangul
" \uAC00" + // simple LVT Hangul
" \uAC01" + // LVTT, last jamo expands for search
" \uAC0F" + // LLVVVTT, every jamo expands for search
" \uAFFF" + // 0xAC01 as conjoining jamo
" \u1100\u1161\u11A8" + // 0xAC01 as compatibility jamo
" \u3131\u314F\u3131" + // 0xAC0F as conjoining jamo; last expands for search
" \u1100\u1161\u11B6" + // 0xAFFF as conjoining jamo; all expand for search
" \u1101\u1170\u11B6" + // small letter ae, expands
" \u00E6" + // small letter o with tilde and acute, decomposes
" \u1E4D" + " ";
int[] rootStandardOffsets = { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
26, 27, 28, 28, 28, 29 };
int[] rootSearchOffsets = { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 22, 22, 23, 23, 23, 24, 24, 25, 26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
26, 27, 28, 28, 28, 29 };
class TSCEItem {
private String localeString;
private int[] offsets;
TSCEItem(String locStr, int[] offs) {
localeString = locStr;
offsets = offs;
}
public String getLocaleString() {
return localeString;
}
public int[] getOffsets() {
return offsets;
}
}
final TSCEItem[] tsceItems = { new TSCEItem("root", rootStandardOffsets), new TSCEItem("root@collation=search", rootSearchOffsets) };
for (TSCEItem tsceItem : tsceItems) {
String localeString = tsceItem.getLocaleString();
ULocale uloc = new ULocale(localeString);
RuleBasedCollator col = null;
try {
col = (RuleBasedCollator) Collator.getInstance(uloc);
} catch (Exception e) {
errln("Error: in locale " + localeString + ", err in Collator.getInstance");
continue;
}
CollationElementIterator uce = col.getCollationElementIterator(tsceText);
int[] offsets = tsceItem.getOffsets();
int ioff, noff = offsets.length;
int offset, element;
ioff = 0;
do {
offset = uce.getOffset();
element = uce.next();
logln(String.format("(%s) offset=%2d ce=%08x\n", tsceItem.localeString, offset, element));
if (element == 0) {
errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
}
if (ioff < noff) {
if (offset != offsets[ioff]) {
errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset);
// ioff = noff;
// break;
}
ioff++;
} else {
errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected");
}
} while (element != CollationElementIterator.NULLORDER);
if (ioff < noff) {
errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
}
// backwards test
uce.setOffset(tsceText.length());
ioff = noff;
do {
offset = uce.getOffset();
element = uce.previous();
if (element == 0) {
errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0");
}
if (ioff > 0) {
ioff--;
if (offset != offsets[ioff]) {
errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset);
// ioff = 0;
// break;
}
} else {
errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected");
}
} while (element != CollationElementIterator.NULLORDER);
if (ioff > 0) {
errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
}
}
}
use of android.icu.text.CollationElementIterator in project j2objc by google.
the class CollationIteratorTest method TestDiscontiguous.
/**
* Testing the discontiguous contractions
*/
@Test
public void TestDiscontiguous() {
String rulestr = "&z < AB < X\u0300 < ABC < X\u0300\u0315";
String[] src = { "ADB", "ADBC", "A\u0315B", "A\u0315BC", // base character blocked
"XD\u0300", "XD\u0300\u0315", // non blocking combining character
"X\u0319\u0300", "X\u0319\u0300\u0315", // blocking combining character
"X\u0314\u0300", "X\u0314\u0300\u0315", // contraction prefix
"ABDC", "AB\u0315C", "X\u0300D\u0315", "X\u0300\u0319\u0315", "X\u0300\u031A\u0315", // ends not with a contraction character
"X\u0319\u0300D", "X\u0319\u0300\u0315D", "X\u0300D\u0315D", "X\u0300\u0319\u0315D", "X\u0300\u031A\u0315D" };
String[] tgt = { // non blocking combining character
"A D B", "A D BC", "A \u0315 B", "A \u0315 BC", // base character blocked
"X D \u0300", "X D \u0300\u0315", // non blocking combining character
"X\u0300 \u0319", "X\u0300\u0315 \u0319", // blocking combining character
"X \u0314 \u0300", "X \u0314 \u0300\u0315", // contraction prefix
"AB DC", "AB \u0315 C", "X\u0300 D \u0315", "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315", // ends not with a contraction character
"X\u0300 \u0319D", "X\u0300\u0315 \u0319D", "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D", "X\u0300 \u031A\u0315D" };
int count = 0;
try {
RuleBasedCollator coll = new RuleBasedCollator(rulestr);
CollationElementIterator iter = coll.getCollationElementIterator("");
CollationElementIterator resultiter = coll.getCollationElementIterator("");
while (count < src.length) {
iter.setText(src[count]);
int s = 0;
while (s < tgt[count].length()) {
int e = tgt[count].indexOf(' ', s);
if (e < 0) {
e = tgt[count].length();
}
String resultstr = tgt[count].substring(s, e);
resultiter.setText(resultstr);
int ce = resultiter.next();
while (ce != CollationElementIterator.NULLORDER) {
if (ce != iter.next()) {
errln("Discontiguos contraction test mismatch at" + count);
return;
}
ce = resultiter.next();
}
s = e + 1;
}
iter.reset();
CollationTest.backAndForth(this, iter);
count++;
}
} catch (Exception e) {
warnln("Error running discontiguous tests " + e.toString());
}
}
use of android.icu.text.CollationElementIterator in project j2objc by google.
the class CollationIteratorTest method TestPrevious.
/**
* Test for CollationElementIterator.previous()
*
* @bug 4108758 - Make sure it works with contracting characters
*/
@Test
public void TestPrevious() /* char* par */
{
RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
CollationElementIterator iter = en_us.getCollationElementIterator(test1);
// A basic test to see if it's working at all
CollationTest.backAndForth(this, iter);
// Test with a contracting character sequence
String source;
RuleBasedCollator c1 = null;
try {
c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
} catch (Exception e) {
errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
return;
}
source = "abchdcba";
iter = c1.getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
// Test with an expanding character sequence
RuleBasedCollator c2 = null;
try {
c2 = new RuleBasedCollator("&a < b < c/abd < d");
} catch (Exception e) {
errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
return;
}
source = "abcd";
iter = c2.getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
// Now try both
RuleBasedCollator c3 = null;
try {
c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
} catch (Exception e) {
errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
return;
}
source = "abcdbchdc";
iter = c3.getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
source = "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
Collator c4 = null;
try {
c4 = Collator.getInstance(new Locale("th", "TH", ""));
} catch (Exception e) {
errln("Couldn't create a collator");
return;
}
iter = ((RuleBasedCollator) c4).getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
source = "\u0061\u30CF\u3099\u30FC";
Collator c5 = null;
try {
c5 = Collator.getInstance(new Locale("ja", "JP", ""));
} catch (Exception e) {
errln("Couldn't create Japanese collator\n");
return;
}
iter = ((RuleBasedCollator) c5).getCollationElementIterator(source);
CollationTest.backAndForth(this, iter);
}
use of android.icu.text.CollationElementIterator in project j2objc by google.
the class CollationAPITest method TestElemIter.
/**
* This tests the CollationElementIterator related APIs.
* - creation of a CollationElementIterator object
* - == and != operators
* - iterating forward
* - reseting the iterator index
* - requesting the order properties(primary, secondary or tertiary)
*/
@Test
public void TestElemIter() {
// logln("testing sortkey begins...");
Collator col = Collator.getInstance(Locale.ENGLISH);
String testString1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
String testString2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
// logln("Constructors and comparison testing....");
CollationElementIterator iterator1 = ((RuleBasedCollator) col).getCollationElementIterator(testString1);
CharacterIterator chariter = new StringCharacterIterator(testString1);
// copy ctor
CollationElementIterator iterator2 = ((RuleBasedCollator) col).getCollationElementIterator(chariter);
UCharacterIterator uchariter = UCharacterIterator.getInstance(testString2);
CollationElementIterator iterator3 = ((RuleBasedCollator) col).getCollationElementIterator(uchariter);
int offset = 0;
offset = iterator1.getOffset();
if (offset != 0) {
errln("Error in getOffset for collation element iterator");
return;
}
iterator1.setOffset(6);
iterator1.setOffset(0);
int order1, order2, order3;
order1 = iterator1.next();
doAssert(!(iterator1.equals(iterator2)), "The first iterator advance failed");
order2 = iterator2.next();
// Code coverage for dummy "not designed" hashCode() which does "assert false".
try {
// We don't expect any particular value.
iterator1.hashCode();
} catch (AssertionError ignored) {
// Expected to be thrown if assertions are enabled.
}
// In ICU 52 and earlier we had iterator1.equals(iterator2)
// but in ICU 53 this fails because the iterators differ (String vs. CharacterIterator).
// doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
doAssert(iterator1.getOffset() == iterator2.getOffset(), "The second iterator advance failed");
doAssert((order1 == order2), "The order result should be the same");
order3 = iterator3.next();
doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be the same");
doAssert((CollationElementIterator.secondaryOrder(order1) == CollationElementIterator.secondaryOrder(order3)), "The secondary orders should be the same");
doAssert((CollationElementIterator.tertiaryOrder(order1) == CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be the same");
order1 = iterator1.next();
order3 = iterator3.next();
doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be identical");
doAssert((CollationElementIterator.tertiaryOrder(order1) != CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be different");
order1 = iterator1.next();
order3 = iterator3.next();
// invalid test wrong in UCA
// doAssert((CollationElementIterator.secondaryOrder(order1) !=
// CollationElementIterator.secondaryOrder(order3)), "The secondary orders should not be the same");
doAssert((order1 != CollationElementIterator.NULLORDER), "Unexpected end of iterator reached");
iterator1.reset();
iterator2.reset();
iterator3.reset();
order1 = iterator1.next();
doAssert(!(iterator1.equals(iterator2)), "The first iterator advance failed");
order2 = iterator2.next();
// In ICU 52 and earlier we had iterator1.equals(iterator2)
// but in ICU 53 this fails because the iterators differ (String vs. CharacterIterator).
// doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
doAssert(iterator1.getOffset() == iterator2.getOffset(), "The second iterator advance failed");
doAssert((order1 == order2), "The order result should be the same");
order3 = iterator3.next();
doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be the same");
doAssert((CollationElementIterator.secondaryOrder(order1) == CollationElementIterator.secondaryOrder(order3)), "The secondary orders should be the same");
doAssert((CollationElementIterator.tertiaryOrder(order1) == CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be the same");
order1 = iterator1.next();
order2 = iterator2.next();
order3 = iterator3.next();
doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be identical");
doAssert((CollationElementIterator.tertiaryOrder(order1) != CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be different");
order1 = iterator1.next();
order3 = iterator3.next();
// obsolete invalid test, removed
// doAssert((CollationElementIterator.secondaryOrder(order1) !=
// CollationElementIterator.secondaryOrder(order3)), "The secondary orders should not be the same");
doAssert((order1 != CollationElementIterator.NULLORDER), "Unexpected end of iterator reached");
doAssert(!(iterator2.equals(iterator3)), "The iterators should be different");
logln("testing CollationElementIterator ends...");
}
Aggregations