Search in sources :

Example 66 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationRegressionTest method Test4133509.

// @bug 4133509
// 
// The sorting using java.text.CollationKey is not in the exact order
// 
@Test
public void Test4133509() /* char* par */
{
    RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
    String[] test1 = { "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e", "\u003c", "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e\u0049\u006e\u0049\u006e\u0069\u0074\u0069\u0061\u006c\u0069\u007a\u0065\u0072\u0045\u0072\u0072\u006f\u0072", "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073", "\u003c", "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073\u0045\u006e\u0076\u0069\u0072\u006f\u006e\u006d\u0065\u006e\u0074", "\u0053\u0074\u0072\u0069\u006e\u0067", "\u003c", "\u0053\u0074\u0072\u0069\u006e\u0067\u0042\u0075\u0066\u0066\u0065\u0072" };
    compareArray(en_us, test1);
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) Test(org.junit.Test)

Example 67 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationRegressionTest method Test4078588.

// @bug 4078588
// 
// RuleBasedCollator breaks on "< a < bb" rule
// 
@Test
public void Test4078588() /* char *par */
{
    RuleBasedCollator rbc = null;
    try {
        rbc = new RuleBasedCollator("&9 < a < bb");
    } catch (Exception e) {
        errln("Failed to create RuleBasedCollator.");
        return;
    }
    int result = rbc.compare("a", "bb");
    if (result >= 0) {
        errln("Compare(a,bb) returned " + result + "; expected -1");
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ParseException(java.text.ParseException) Test(org.junit.Test)

Example 68 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationRegressionTest method Test4114077.

// @bug 4114077
// 
// Collation with decomposition off doesn't work for Europe
// 
@Test
public void Test4114077() /* char* par */
{
    // Ensure that we get the same results with decomposition off
    // as we do with it on....
    RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
    c.setStrength(Collator.TERTIARY);
    String[] test1 = { // Should be equivalent
    "\u00C0", // Should be equivalent
    "\u003d", // Should be equivalent
    "\u0041\u0300", "\u0070\u00ea\u0063\u0068\u0065", "\u003e", "\u0070\u00e9\u0063\u0068\u00e9", "\u0204", "\u003d", "\u0045\u030F", // a-ring-acute -> a-ring, acute
    "\u01fa", // a-ring-acute -> a-ring, acute
    "\u003d", // a-ring-acute -> a-ring, acute
    "\u0041\u030a\u0301", // No reordering --> unequal
    "\u0041\u0300\u0316", // No reordering --> unequal
    "\u003c", // No reordering --> unequal
    "\u0041\u0316\u0300" };
    c.setDecomposition(Collator.NO_DECOMPOSITION);
    compareArray(c, test1);
    String[] test2 = { // Reordering --> equal
    "\u0041\u0300\u0316", // Reordering --> equal
    "\u003d", // Reordering --> equal
    "\u0041\u0316\u0300" };
    c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    compareArray(c, test2);
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) Test(org.junit.Test)

Example 69 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationIteratorTest method TestUnicodeChar.

/**
 * Test for CollationElementIterator previous and next for the whole set of
 * unicode characters.
 */
@Test
public void TestUnicodeChar() {
    RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
    CollationElementIterator iter;
    char codepoint;
    StringBuffer source = new StringBuffer();
    source.append("\u0e4d\u0e4e\u0e4f");
    // source.append("\u04e8\u04e9");
    iter = en_us.getCollationElementIterator(source.toString());
    // A basic test to see if it's working at all
    CollationTest.backAndForth(this, iter);
    for (codepoint = 1; codepoint < 0xFFFE; ) {
        source.delete(0, source.length());
        while (codepoint % 0xFF != 0) {
            if (UCharacter.isDefined(codepoint)) {
                source.append(codepoint);
            }
            codepoint++;
        }
        if (UCharacter.isDefined(codepoint)) {
            source.append(codepoint);
        }
        if (codepoint != 0xFFFF) {
            codepoint++;
        }
        /*if (codepoint >= 0x04fc) {
                System.out.println("codepoint " + Integer.toHexString(codepoint));
                String str = source.substring(230, 232);
                System.out.println(android.icu.impl.Utility.escape(str));
                System.out.println("codepoint " + Integer.toHexString(codepoint) 
                                   + "length " + str.length());
                iter = en_us.getCollationElementIterator(str);
                CollationTest.backAndForth(this, iter);
            }
            */
        iter = en_us.getCollationElementIterator(source.toString());
        // A basic test to see if it's working at all
        CollationTest.backAndForth(this, iter);
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationElementIterator(android.icu.text.CollationElementIterator) Test(org.junit.Test)

Example 70 with RuleBasedCollator

use of android.icu.text.RuleBasedCollator in project j2objc by google.

the class CollationIteratorTest method TestSearchCollatorElements.

/**
 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
 * normalization on AND jamo tailoring, among other things.
 *
 * Note: This test is sensitive to changes of the root collator,
 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
 * For example, the DUCET's artificial secondary CE in the ae-ligature
 * may map to two 32-bit iterator CEs (as it did until ICU 52).
 */
@Test
public void TestSearchCollatorElements() {
    String tsceText = // simple LV Hangul
    " \uAC00" + // simple LVT Hangul
    " \uAC01" + // LVTT, last jamo expands for search
    " \uAC0F" + // LLVVVTT, every jamo expands for search
    " \uAFFF" + // 0xAC01 as conjoining jamo
    " \u1100\u1161\u11A8" + // 0xAC01 as compatibility jamo
    " \u3131\u314F\u3131" + // 0xAC0F as conjoining jamo; last expands for search
    " \u1100\u1161\u11B6" + // 0xAFFF as conjoining jamo; all expand for search
    " \u1101\u1170\u11B6" + // small letter ae, expands
    " \u00E6" + // small letter o with tilde and acute, decomposes
    " \u1E4D" + " ";
    int[] rootStandardOffsets = { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    26, 27, 28, 28, 28, 29 };
    int[] rootSearchOffsets = { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 22, 22, 23, 23, 23, 24, 24, 25, 26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    26, 27, 28, 28, 28, 29 };
    class TSCEItem {

        private String localeString;

        private int[] offsets;

        TSCEItem(String locStr, int[] offs) {
            localeString = locStr;
            offsets = offs;
        }

        public String getLocaleString() {
            return localeString;
        }

        public int[] getOffsets() {
            return offsets;
        }
    }
    final TSCEItem[] tsceItems = { new TSCEItem("root", rootStandardOffsets), new TSCEItem("root@collation=search", rootSearchOffsets) };
    for (TSCEItem tsceItem : tsceItems) {
        String localeString = tsceItem.getLocaleString();
        ULocale uloc = new ULocale(localeString);
        RuleBasedCollator col = null;
        try {
            col = (RuleBasedCollator) Collator.getInstance(uloc);
        } catch (Exception e) {
            errln("Error: in locale " + localeString + ", err in Collator.getInstance");
            continue;
        }
        CollationElementIterator uce = col.getCollationElementIterator(tsceText);
        int[] offsets = tsceItem.getOffsets();
        int ioff, noff = offsets.length;
        int offset, element;
        ioff = 0;
        do {
            offset = uce.getOffset();
            element = uce.next();
            logln(String.format("(%s) offset=%2d  ce=%08x\n", tsceItem.localeString, offset, element));
            if (element == 0) {
                errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
            }
            if (ioff < noff) {
                if (offset != offsets[ioff]) {
                    errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset);
                // ioff = noff;
                // break;
                }
                ioff++;
            } else {
                errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected");
            }
        } while (element != CollationElementIterator.NULLORDER);
        if (ioff < noff) {
            errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
        }
        // backwards test
        uce.setOffset(tsceText.length());
        ioff = noff;
        do {
            offset = uce.getOffset();
            element = uce.previous();
            if (element == 0) {
                errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0");
            }
            if (ioff > 0) {
                ioff--;
                if (offset != offsets[ioff]) {
                    errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset);
                // ioff = 0;
                // break;
                }
            } else {
                errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected");
            }
        } while (element != CollationElementIterator.NULLORDER);
        if (ioff > 0) {
            errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
        }
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) CollationElementIterator(android.icu.text.CollationElementIterator) Test(org.junit.Test)

Aggregations

RuleBasedCollator (android.icu.text.RuleBasedCollator)140 Test (org.junit.Test)124 Collator (android.icu.text.Collator)42 ULocale (android.icu.util.ULocale)26 CollationElementIterator (android.icu.text.CollationElementIterator)25 Locale (java.util.Locale)22 CollationKey (android.icu.text.CollationKey)17 StringCharacterIterator (java.text.StringCharacterIterator)16 StringSearch (android.icu.text.StringSearch)14 RawCollationKey (android.icu.text.RawCollationKey)11 ParseException (java.text.ParseException)10 UnicodeSet (android.icu.text.UnicodeSet)8 AlphabeticIndex (android.icu.text.AlphabeticIndex)6 BreakIterator (android.icu.text.BreakIterator)6 MissingResourceException (java.util.MissingResourceException)5 IOException (java.io.IOException)4 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)3 UCharacterIterator (android.icu.text.UCharacterIterator)2 CharacterIterator (java.text.CharacterIterator)2 ArrayList (java.util.ArrayList)2