Search in sources :

Example 6 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationThaiTest method TestDictionary.

/**
 * Read the external dictionary file, which is already in proper
 * sorted order, and confirm that the collator compares each line as
 * preceding the following line.
 */
@Test
public void TestDictionary() {
    RuleBasedCollator coll = null;
    try {
        coll = getThaiCollator();
    } catch (Exception e) {
        warnln("could not construct Thai collator");
        return;
    }
    // Read in a dictionary of Thai words
    int line = 0;
    int failed = 0;
    int wordCount = 0;
    BufferedReader in = null;
    try {
        String fileName = "riwords.txt";
        in = TestUtil.getDataReader(fileName, "UTF-8");
        // 
        // Loop through each word in the dictionary and compare it to the previous
        // word. They should be in sorted order.
        // 
        String lastWord = "";
        String word = in.readLine();
        while (word != null) {
            line++;
            // Skip comments and blank lines
            if (word.length() == 0 || word.charAt(0) == 0x23) {
                word = in.readLine();
                continue;
            }
            // Show the first 8 words being compared, so we can see what's happening
            ++wordCount;
            if (wordCount <= 8) {
                logln("Word " + wordCount + ": " + word);
            }
            if (lastWord.length() > 0) {
                // CollationTest.doTest isn't really set up to handle situations where
                // the result can be equal or greater than the previous, so have to skip for now.
                // Not a big deal, since we're still testing to make sure everything sorts out
                // right, just not looking at the colation keys in detail...
                // CollationTest.doTest(this, coll, lastWord, word, -1);
                int result = coll.compare(lastWord, word);
                if (result > 0) {
                    failed++;
                    if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {
                        String msg = "--------------------------------------------\n" + line + " compare(" + lastWord + ", " + word + ") returned " + result + ", expected -1\n";
                        CollationKey k1, k2;
                        k1 = coll.getCollationKey(lastWord);
                        k2 = coll.getCollationKey(word);
                        msg += "key1: " + CollationTest.prettify(k1) + "\n" + "key2: " + CollationTest.prettify(k2);
                        errln(msg);
                    }
                }
            }
            lastWord = word;
            word = in.readLine();
        }
    } catch (IOException e) {
        errln("IOException " + e.getMessage());
    } finally {
        if (in == null) {
            errln("Error: could not open test file. Aborting test.");
            return;
        } else {
            try {
                in.close();
            } catch (IOException ignored) {
            }
        }
    }
    if (failed != 0) {
        if (failed > MAX_FAILURES_TO_SHOW) {
            errln("Too many failures; only the first " + MAX_FAILURES_TO_SHOW + " failures were shown");
        }
        errln("Summary: " + failed + " of " + (line - 1) + " comparisons failed");
    }
    logln("Words checked: " + wordCount);
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationKey(android.icu.text.CollationKey) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) IOException(java.io.IOException) Test(org.junit.Test)

Example 7 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationThaiTest method TestCornerCases.

/**
 * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",
 * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip
 */
@Test
public void TestCornerCases() {
    String[] TESTS = { // Shorter words precede longer
    "\u0e01", "<", "\u0e01\u0e01", // Tone marks are considered after letters (i.e. are primary ignorable)
    "\u0e01\u0e32", "<", "\u0e01\u0e49\u0e32", // ditto for other over-marks
    "\u0e01\u0e32", "<", "\u0e01\u0e32\u0e4c", // In effect, marks are sorted after each syllable.
    "\u0e01\u0e32\u0e01\u0e49\u0e32", "<", "\u0e01\u0e48\u0e32\u0e01\u0e49\u0e32", // Hyphens and other punctuation follow whitespace but come before letters
    "\u0e01\u0e32", "=", "\u0e01\u0e32-", "\u0e01\u0e32-", "<", "\u0e01\u0e32\u0e01\u0e32", // Doubler follows an indentical word without the doubler
    "\u0e01\u0e32", "=", "\u0e01\u0e32\u0e46", "\u0e01\u0e32\u0e46", "<", "\u0e01\u0e32\u0e01\u0e32", // TODO: beef up this case
    "\u0e24\u0e29\u0e35", "<", "\u0e24\u0e45\u0e29\u0e35", "\u0e26\u0e29\u0e35", "<", "\u0e26\u0e45\u0e29\u0e35", // Vowels reorder, should compare \u0e2d and \u0e34
    "\u0e40\u0e01\u0e2d", "<", "\u0e40\u0e01\u0e34", // Tones are compared after the rest of the word (e.g. primary ignorable)
    "\u0e01\u0e32\u0e01\u0e48\u0e32", "<", "\u0e01\u0e49\u0e32\u0e01\u0e32", // Periods are ignored entirely
    "\u0e01.\u0e01.", "<", "\u0e01\u0e32" };
    RuleBasedCollator coll = null;
    try {
        coll = getThaiCollator();
    } catch (Exception e) {
        warnln("could not construct Thai collator");
        return;
    }
    compareArray(coll, TESTS);
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) IOException(java.io.IOException) Test(org.junit.Test)

Example 8 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class SearchTest method TestCollator.

@Test
public void TestCollator() {
    // test collator that thinks "o" and "p" are the same thing
    String text = COLLATOR[0].text;
    String pattern = COLLATOR[0].pattern;
    StringSearch strsrch = null;
    try {
        strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
    } catch (Exception e) {
        errln("Error opening string search ");
        return;
    }
    if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
        return;
    }
    String rules = TESTCOLLATORRULE;
    RuleBasedCollator tailored = null;
    try {
        tailored = new RuleBasedCollator(rules);
        tailored.setStrength(COLLATOR[1].strength);
    } catch (Exception e) {
        errln("Error opening rule based collator ");
        return;
    }
    strsrch.setCollator(tailored);
    if (!strsrch.getCollator().equals(tailored)) {
        errln("Error setting rule based collator");
    }
    strsrch.reset();
    if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
        return;
    }
    strsrch.setCollator(m_en_us_);
    strsrch.reset();
    if (!strsrch.getCollator().equals(m_en_us_)) {
        errln("Error setting rule based collator");
    }
    if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
        errln("Error searching collator test");
    }
}
Also used : StringCharacterIterator(java.text.StringCharacterIterator) RuleBasedCollator(android.icu.text.RuleBasedCollator) StringSearch(android.icu.text.StringSearch) Test(org.junit.Test)

Example 9 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class SearchTest method TestContraction.

@Test
public void TestContraction() {
    String rules = CONTRACTIONRULE;
    RuleBasedCollator collator = null;
    try {
        collator = new RuleBasedCollator(rules);
        collator.setStrength(TERTIARY);
        collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    } catch (Exception e) {
        errln("Error opening collator ");
    }
    String text = "text";
    String pattern = "pattern";
    StringSearch strsrch = null;
    try {
        strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
    } catch (Exception e) {
        errln("Error opening string search ");
    }
    for (int count = 0; count < CONTRACTION.length; count++) {
        text = CONTRACTION[count].text;
        pattern = CONTRACTION[count].pattern;
        strsrch.setTarget(new StringCharacterIterator(text));
        strsrch.setPattern(pattern);
        if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
            errln("Error at test number " + count);
        }
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) StringCharacterIterator(java.text.StringCharacterIterator) StringSearch(android.icu.text.StringSearch) Test(org.junit.Test)

Example 10 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class SearchTest method TestUsingSearchCollator.

@Test
public void TestUsingSearchCollator() {
    String scKoText = " " + /*01*/
    "\uAC00 " + /*03*/
    "\uAC01 " + /*05*/
    "\uAC0F " + /*07*/
    "\uAFFF " + /*09*/
    "\u1100\u1161\u11A8 " + /*13*/
    "\u1100\u1161\u1100 " + /*17*/
    "\u3131\u314F\u3131 " + /*21*/
    "\u1100\u1161\u11B6 " + /*25*/
    "\u1100\u1161\u1105\u1112 " + /*30*/
    "\u1101\u1170\u11B6 " + /*34*/
    "\u00E6 " + /*36*/
    "\u1E4D " + // small letter o with tilde and acute, decomposes
    "";
    String scKoPat0 = "\uAC01";
    // 0xAC01 as conjoining jamo
    String scKoPat1 = "\u1100\u1161\u11A8";
    String scKoPat2 = "\uAC0F";
    // 0xAC0F as basic conjoining jamo
    String scKoPat3 = "\u1100\u1161\u1105\u1112";
    String scKoPat4 = "\uAFFF";
    // 0xAFFF as conjoining jamo
    String scKoPat5 = "\u1101\u1170\u11B6";
    int[] scKoSrchOff01 = { 3, 9, 13 };
    int[] scKoSrchOff23 = { 5, 21, 25 };
    int[] scKoSrchOff45 = { 7, 30 };
    int[] scKoStndOff01 = { 3, 9 };
    int[] scKoStndOff2 = { 5, 21 };
    int[] scKoStndOff3 = { 25 };
    int[] scKoStndOff45 = { 7, 30 };
    class PatternAndOffsets {

        private String pattern;

        private int[] offsets;

        PatternAndOffsets(String pat, int[] offs) {
            pattern = pat;
            offsets = offs;
        }

        public String getPattern() {
            return pattern;
        }

        public int[] getOffsets() {
            return offsets;
        }
    }
    final PatternAndOffsets[] scKoSrchPatternsOffsets = { new PatternAndOffsets(scKoPat0, scKoSrchOff01), new PatternAndOffsets(scKoPat1, scKoSrchOff01), new PatternAndOffsets(scKoPat2, scKoSrchOff23), new PatternAndOffsets(scKoPat3, scKoSrchOff23), new PatternAndOffsets(scKoPat4, scKoSrchOff45), new PatternAndOffsets(scKoPat5, scKoSrchOff45) };
    final PatternAndOffsets[] scKoStndPatternsOffsets = { new PatternAndOffsets(scKoPat0, scKoStndOff01), new PatternAndOffsets(scKoPat1, scKoStndOff01), new PatternAndOffsets(scKoPat2, scKoStndOff2), new PatternAndOffsets(scKoPat3, scKoStndOff3), new PatternAndOffsets(scKoPat4, scKoStndOff45), new PatternAndOffsets(scKoPat5, scKoStndOff45) };
    class TUSCItem {

        private String localeString;

        private String text;

        private PatternAndOffsets[] patternsAndOffsets;

        TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) {
            localeString = locStr;
            text = txt;
            patternsAndOffsets = patsAndOffs;
        }

        public String getLocaleString() {
            return localeString;
        }

        public String getText() {
            return text;
        }

        public PatternAndOffsets[] getPatternsAndOffsets() {
            return patternsAndOffsets;
        }
    }
    final TUSCItem[] tuscItems = { new TUSCItem("root", scKoText, scKoStndPatternsOffsets), new TUSCItem("root@collation=search", scKoText, scKoSrchPatternsOffsets), new TUSCItem("ko@collation=search", scKoText, scKoSrchPatternsOffsets) };
    String dummyPat = "a";
    for (TUSCItem tuscItem : tuscItems) {
        String localeString = tuscItem.getLocaleString();
        ULocale uloc = new ULocale(localeString);
        RuleBasedCollator col = null;
        try {
            col = (RuleBasedCollator) Collator.getInstance(uloc);
        } catch (Exception e) {
            errln("Error: in locale " + localeString + ", err in Collator.getInstance");
            continue;
        }
        StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText());
        StringSearch srch = new StringSearch(dummyPat, ci, col);
        for (PatternAndOffsets patternAndOffsets : tuscItem.getPatternsAndOffsets()) {
            srch.setPattern(patternAndOffsets.getPattern());
            int[] offsets = patternAndOffsets.getOffsets();
            int ioff, noff = offsets.length;
            int offset;
            srch.reset();
            ioff = 0;
            while (true) {
                offset = srch.next();
                if (offset == SearchIterator.DONE) {
                    break;
                }
                if (ioff < noff) {
                    if (offset != offsets[ioff]) {
                        errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset);
                    // ioff = noff;
                    // break;
                    }
                    ioff++;
                } else {
                    errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected");
                }
            }
            if (ioff < noff) {
                errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected");
            }
            srch.reset();
            ioff = noff;
            while (true) {
                offset = srch.previous();
                if (offset == SearchIterator.DONE) {
                    break;
                }
                if (ioff > 0) {
                    ioff--;
                    if (offset != offsets[ioff]) {
                        errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset);
                    // ioff = 0;
                    // break;
                    }
                } else {
                    errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected");
                }
            }
            if (ioff > 0) {
                errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected");
            }
        }
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) StringCharacterIterator(java.text.StringCharacterIterator) ULocale(android.icu.util.ULocale) StringSearch(android.icu.text.StringSearch) Test(org.junit.Test)

Aggregations

RuleBasedCollator (android.icu.text.RuleBasedCollator)140 Test (org.junit.Test)124 Collator (android.icu.text.Collator)42 ULocale (android.icu.util.ULocale)26 CollationElementIterator (android.icu.text.CollationElementIterator)25 Locale (java.util.Locale)22 CollationKey (android.icu.text.CollationKey)17 StringCharacterIterator (java.text.StringCharacterIterator)16 StringSearch (android.icu.text.StringSearch)14 RawCollationKey (android.icu.text.RawCollationKey)11 ParseException (java.text.ParseException)10 UnicodeSet (android.icu.text.UnicodeSet)8 AlphabeticIndex (android.icu.text.AlphabeticIndex)6 BreakIterator (android.icu.text.BreakIterator)6 MissingResourceException (java.util.MissingResourceException)5 IOException (java.io.IOException)4 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)3 UCharacterIterator (android.icu.text.UCharacterIterator)2 CharacterIterator (java.text.CharacterIterator)2 ArrayList (java.util.ArrayList)2