use of android.icu.text.RuleBasedCollator in project j2objc by google.
the class CollationThaiTest method TestDictionary.
/**
* Read the external dictionary file, which is already in proper
* sorted order, and confirm that the collator compares each line as
* preceding the following line.
*/
@Test
public void TestDictionary() {
RuleBasedCollator coll = null;
try {
coll = getThaiCollator();
} catch (Exception e) {
warnln("could not construct Thai collator");
return;
}
// Read in a dictionary of Thai words
int line = 0;
int failed = 0;
int wordCount = 0;
BufferedReader in = null;
try {
String fileName = "riwords.txt";
in = TestUtil.getDataReader(fileName, "UTF-8");
//
// Loop through each word in the dictionary and compare it to the previous
// word. They should be in sorted order.
//
String lastWord = "";
String word = in.readLine();
while (word != null) {
line++;
// Skip comments and blank lines
if (word.length() == 0 || word.charAt(0) == 0x23) {
word = in.readLine();
continue;
}
// Show the first 8 words being compared, so we can see what's happening
++wordCount;
if (wordCount <= 8) {
logln("Word " + wordCount + ": " + word);
}
if (lastWord.length() > 0) {
// CollationTest.doTest isn't really set up to handle situations where
// the result can be equal or greater than the previous, so have to skip for now.
// Not a big deal, since we're still testing to make sure everything sorts out
// right, just not looking at the colation keys in detail...
// CollationTest.doTest(this, coll, lastWord, word, -1);
int result = coll.compare(lastWord, word);
if (result > 0) {
failed++;
if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {
String msg = "--------------------------------------------\n" + line + " compare(" + lastWord + ", " + word + ") returned " + result + ", expected -1\n";
CollationKey k1, k2;
k1 = coll.getCollationKey(lastWord);
k2 = coll.getCollationKey(word);
msg += "key1: " + CollationTest.prettify(k1) + "\n" + "key2: " + CollationTest.prettify(k2);
errln(msg);
}
}
}
lastWord = word;
word = in.readLine();
}
} catch (IOException e) {
errln("IOException " + e.getMessage());
} finally {
if (in == null) {
errln("Error: could not open test file. Aborting test.");
return;
} else {
try {
in.close();
} catch (IOException ignored) {
}
}
}
if (failed != 0) {
if (failed > MAX_FAILURES_TO_SHOW) {
errln("Too many failures; only the first " + MAX_FAILURES_TO_SHOW + " failures were shown");
}
errln("Summary: " + failed + " of " + (line - 1) + " comparisons failed");
}
logln("Words checked: " + wordCount);
}
use of android.icu.text.RuleBasedCollator in project j2objc by google.
the class CollationThaiTest method TestCornerCases.
/**
* Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",
* by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip
*/
@Test
public void TestCornerCases() {
String[] TESTS = { // Shorter words precede longer
"\u0e01", "<", "\u0e01\u0e01", // Tone marks are considered after letters (i.e. are primary ignorable)
"\u0e01\u0e32", "<", "\u0e01\u0e49\u0e32", // ditto for other over-marks
"\u0e01\u0e32", "<", "\u0e01\u0e32\u0e4c", // In effect, marks are sorted after each syllable.
"\u0e01\u0e32\u0e01\u0e49\u0e32", "<", "\u0e01\u0e48\u0e32\u0e01\u0e49\u0e32", // Hyphens and other punctuation follow whitespace but come before letters
"\u0e01\u0e32", "=", "\u0e01\u0e32-", "\u0e01\u0e32-", "<", "\u0e01\u0e32\u0e01\u0e32", // Doubler follows an indentical word without the doubler
"\u0e01\u0e32", "=", "\u0e01\u0e32\u0e46", "\u0e01\u0e32\u0e46", "<", "\u0e01\u0e32\u0e01\u0e32", // TODO: beef up this case
"\u0e24\u0e29\u0e35", "<", "\u0e24\u0e45\u0e29\u0e35", "\u0e26\u0e29\u0e35", "<", "\u0e26\u0e45\u0e29\u0e35", // Vowels reorder, should compare \u0e2d and \u0e34
"\u0e40\u0e01\u0e2d", "<", "\u0e40\u0e01\u0e34", // Tones are compared after the rest of the word (e.g. primary ignorable)
"\u0e01\u0e32\u0e01\u0e48\u0e32", "<", "\u0e01\u0e49\u0e32\u0e01\u0e32", // Periods are ignored entirely
"\u0e01.\u0e01.", "<", "\u0e01\u0e32" };
RuleBasedCollator coll = null;
try {
coll = getThaiCollator();
} catch (Exception e) {
warnln("could not construct Thai collator");
return;
}
compareArray(coll, TESTS);
}
use of android.icu.text.RuleBasedCollator in project j2objc by google.
the class SearchTest method TestCollator.
@Test
public void TestCollator() {
// test collator that thinks "o" and "p" are the same thing
String text = COLLATOR[0].text;
String pattern = COLLATOR[0].pattern;
StringSearch strsrch = null;
try {
strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
} catch (Exception e) {
errln("Error opening string search ");
return;
}
if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
return;
}
String rules = TESTCOLLATORRULE;
RuleBasedCollator tailored = null;
try {
tailored = new RuleBasedCollator(rules);
tailored.setStrength(COLLATOR[1].strength);
} catch (Exception e) {
errln("Error opening rule based collator ");
return;
}
strsrch.setCollator(tailored);
if (!strsrch.getCollator().equals(tailored)) {
errln("Error setting rule based collator");
}
strsrch.reset();
if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
return;
}
strsrch.setCollator(m_en_us_);
strsrch.reset();
if (!strsrch.getCollator().equals(m_en_us_)) {
errln("Error setting rule based collator");
}
if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
errln("Error searching collator test");
}
}
use of android.icu.text.RuleBasedCollator in project j2objc by google.
the class SearchTest method TestContraction.
@Test
public void TestContraction() {
String rules = CONTRACTIONRULE;
RuleBasedCollator collator = null;
try {
collator = new RuleBasedCollator(rules);
collator.setStrength(TERTIARY);
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
} catch (Exception e) {
errln("Error opening collator ");
}
String text = "text";
String pattern = "pattern";
StringSearch strsrch = null;
try {
strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
} catch (Exception e) {
errln("Error opening string search ");
}
for (int count = 0; count < CONTRACTION.length; count++) {
text = CONTRACTION[count].text;
pattern = CONTRACTION[count].pattern;
strsrch.setTarget(new StringCharacterIterator(text));
strsrch.setPattern(pattern);
if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
errln("Error at test number " + count);
}
}
}
use of android.icu.text.RuleBasedCollator in project j2objc by google.
the class SearchTest method TestUsingSearchCollator.
@Test
public void TestUsingSearchCollator() {
String scKoText = " " + /*01*/
"\uAC00 " + /*03*/
"\uAC01 " + /*05*/
"\uAC0F " + /*07*/
"\uAFFF " + /*09*/
"\u1100\u1161\u11A8 " + /*13*/
"\u1100\u1161\u1100 " + /*17*/
"\u3131\u314F\u3131 " + /*21*/
"\u1100\u1161\u11B6 " + /*25*/
"\u1100\u1161\u1105\u1112 " + /*30*/
"\u1101\u1170\u11B6 " + /*34*/
"\u00E6 " + /*36*/
"\u1E4D " + // small letter o with tilde and acute, decomposes
"";
String scKoPat0 = "\uAC01";
// 0xAC01 as conjoining jamo
String scKoPat1 = "\u1100\u1161\u11A8";
String scKoPat2 = "\uAC0F";
// 0xAC0F as basic conjoining jamo
String scKoPat3 = "\u1100\u1161\u1105\u1112";
String scKoPat4 = "\uAFFF";
// 0xAFFF as conjoining jamo
String scKoPat5 = "\u1101\u1170\u11B6";
int[] scKoSrchOff01 = { 3, 9, 13 };
int[] scKoSrchOff23 = { 5, 21, 25 };
int[] scKoSrchOff45 = { 7, 30 };
int[] scKoStndOff01 = { 3, 9 };
int[] scKoStndOff2 = { 5, 21 };
int[] scKoStndOff3 = { 25 };
int[] scKoStndOff45 = { 7, 30 };
class PatternAndOffsets {
private String pattern;
private int[] offsets;
PatternAndOffsets(String pat, int[] offs) {
pattern = pat;
offsets = offs;
}
public String getPattern() {
return pattern;
}
public int[] getOffsets() {
return offsets;
}
}
final PatternAndOffsets[] scKoSrchPatternsOffsets = { new PatternAndOffsets(scKoPat0, scKoSrchOff01), new PatternAndOffsets(scKoPat1, scKoSrchOff01), new PatternAndOffsets(scKoPat2, scKoSrchOff23), new PatternAndOffsets(scKoPat3, scKoSrchOff23), new PatternAndOffsets(scKoPat4, scKoSrchOff45), new PatternAndOffsets(scKoPat5, scKoSrchOff45) };
final PatternAndOffsets[] scKoStndPatternsOffsets = { new PatternAndOffsets(scKoPat0, scKoStndOff01), new PatternAndOffsets(scKoPat1, scKoStndOff01), new PatternAndOffsets(scKoPat2, scKoStndOff2), new PatternAndOffsets(scKoPat3, scKoStndOff3), new PatternAndOffsets(scKoPat4, scKoStndOff45), new PatternAndOffsets(scKoPat5, scKoStndOff45) };
class TUSCItem {
private String localeString;
private String text;
private PatternAndOffsets[] patternsAndOffsets;
TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) {
localeString = locStr;
text = txt;
patternsAndOffsets = patsAndOffs;
}
public String getLocaleString() {
return localeString;
}
public String getText() {
return text;
}
public PatternAndOffsets[] getPatternsAndOffsets() {
return patternsAndOffsets;
}
}
final TUSCItem[] tuscItems = { new TUSCItem("root", scKoText, scKoStndPatternsOffsets), new TUSCItem("root@collation=search", scKoText, scKoSrchPatternsOffsets), new TUSCItem("ko@collation=search", scKoText, scKoSrchPatternsOffsets) };
String dummyPat = "a";
for (TUSCItem tuscItem : tuscItems) {
String localeString = tuscItem.getLocaleString();
ULocale uloc = new ULocale(localeString);
RuleBasedCollator col = null;
try {
col = (RuleBasedCollator) Collator.getInstance(uloc);
} catch (Exception e) {
errln("Error: in locale " + localeString + ", err in Collator.getInstance");
continue;
}
StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText());
StringSearch srch = new StringSearch(dummyPat, ci, col);
for (PatternAndOffsets patternAndOffsets : tuscItem.getPatternsAndOffsets()) {
srch.setPattern(patternAndOffsets.getPattern());
int[] offsets = patternAndOffsets.getOffsets();
int ioff, noff = offsets.length;
int offset;
srch.reset();
ioff = 0;
while (true) {
offset = srch.next();
if (offset == SearchIterator.DONE) {
break;
}
if (ioff < noff) {
if (offset != offsets[ioff]) {
errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset);
// ioff = noff;
// break;
}
ioff++;
} else {
errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected");
}
}
if (ioff < noff) {
errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected");
}
srch.reset();
ioff = noff;
while (true) {
offset = srch.previous();
if (offset == SearchIterator.DONE) {
break;
}
if (ioff > 0) {
ioff--;
if (offset != offsets[ioff]) {
errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset);
// ioff = 0;
// break;
}
} else {
errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected");
}
}
if (ioff > 0) {
errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected");
}
}
}
}
Aggregations