Search in sources :

Example 6 with CollationElementIterator

use of android.icu.text.CollationElementIterator in project j2objc by google.

the class CollationIteratorTest method TestUnicodeChar.

/**
 * Test for CollationElementIterator previous and next for the whole set of
 * unicode characters.
 */
@Test
public void TestUnicodeChar() {
    RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
    CollationElementIterator iter;
    char codepoint;
    StringBuffer source = new StringBuffer();
    source.append("\u0e4d\u0e4e\u0e4f");
    // source.append("\u04e8\u04e9");
    iter = en_us.getCollationElementIterator(source.toString());
    // A basic test to see if it's working at all
    CollationTest.backAndForth(this, iter);
    for (codepoint = 1; codepoint < 0xFFFE; ) {
        source.delete(0, source.length());
        while (codepoint % 0xFF != 0) {
            if (UCharacter.isDefined(codepoint)) {
                source.append(codepoint);
            }
            codepoint++;
        }
        if (UCharacter.isDefined(codepoint)) {
            source.append(codepoint);
        }
        if (codepoint != 0xFFFF) {
            codepoint++;
        }
        /*if (codepoint >= 0x04fc) {
                System.out.println("codepoint " + Integer.toHexString(codepoint));
                String str = source.substring(230, 232);
                System.out.println(android.icu.impl.Utility.escape(str));
                System.out.println("codepoint " + Integer.toHexString(codepoint) 
                                   + "length " + str.length());
                iter = en_us.getCollationElementIterator(str);
                CollationTest.backAndForth(this, iter);
            }
            */
        iter = en_us.getCollationElementIterator(source.toString());
        // A basic test to see if it's working at all
        CollationTest.backAndForth(this, iter);
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationElementIterator(android.icu.text.CollationElementIterator) Test(org.junit.Test)

Example 7 with CollationElementIterator

use of android.icu.text.CollationElementIterator in project j2objc by google.

the class CollationIteratorTest method TestSearchCollatorElements.

/**
 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
 * normalization on AND jamo tailoring, among other things.
 *
 * Note: This test is sensitive to changes of the root collator,
 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
 * For example, the DUCET's artificial secondary CE in the ae-ligature
 * may map to two 32-bit iterator CEs (as it did until ICU 52).
 */
@Test
public void TestSearchCollatorElements() {
    String tsceText = // simple LV Hangul
    " \uAC00" + // simple LVT Hangul
    " \uAC01" + // LVTT, last jamo expands for search
    " \uAC0F" + // LLVVVTT, every jamo expands for search
    " \uAFFF" + // 0xAC01 as conjoining jamo
    " \u1100\u1161\u11A8" + // 0xAC01 as compatibility jamo
    " \u3131\u314F\u3131" + // 0xAC0F as conjoining jamo; last expands for search
    " \u1100\u1161\u11B6" + // 0xAFFF as conjoining jamo; all expand for search
    " \u1101\u1170\u11B6" + // small letter ae, expands
    " \u00E6" + // small letter o with tilde and acute, decomposes
    " \u1E4D" + " ";
    int[] rootStandardOffsets = { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    26, 27, 28, 28, 28, 29 };
    int[] rootSearchOffsets = { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 22, 22, 23, 23, 23, 24, 24, 25, 26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    26, 27, 28, 28, 28, 29 };
    class TSCEItem {

        private String localeString;

        private int[] offsets;

        TSCEItem(String locStr, int[] offs) {
            localeString = locStr;
            offsets = offs;
        }

        public String getLocaleString() {
            return localeString;
        }

        public int[] getOffsets() {
            return offsets;
        }
    }
    final TSCEItem[] tsceItems = { new TSCEItem("root", rootStandardOffsets), new TSCEItem("root@collation=search", rootSearchOffsets) };
    for (TSCEItem tsceItem : tsceItems) {
        String localeString = tsceItem.getLocaleString();
        ULocale uloc = new ULocale(localeString);
        RuleBasedCollator col = null;
        try {
            col = (RuleBasedCollator) Collator.getInstance(uloc);
        } catch (Exception e) {
            errln("Error: in locale " + localeString + ", err in Collator.getInstance");
            continue;
        }
        CollationElementIterator uce = col.getCollationElementIterator(tsceText);
        int[] offsets = tsceItem.getOffsets();
        int ioff, noff = offsets.length;
        int offset, element;
        ioff = 0;
        do {
            offset = uce.getOffset();
            element = uce.next();
            logln(String.format("(%s) offset=%2d  ce=%08x\n", tsceItem.localeString, offset, element));
            if (element == 0) {
                errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
            }
            if (ioff < noff) {
                if (offset != offsets[ioff]) {
                    errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset);
                // ioff = noff;
                // break;
                }
                ioff++;
            } else {
                errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected");
            }
        } while (element != CollationElementIterator.NULLORDER);
        if (ioff < noff) {
            errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
        }
        // backwards test
        uce.setOffset(tsceText.length());
        ioff = noff;
        do {
            offset = uce.getOffset();
            element = uce.previous();
            if (element == 0) {
                errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0");
            }
            if (ioff > 0) {
                ioff--;
                if (offset != offsets[ioff]) {
                    errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset);
                // ioff = 0;
                // break;
                }
            } else {
                errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected");
            }
        } while (element != CollationElementIterator.NULLORDER);
        if (ioff > 0) {
            errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
        }
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) CollationElementIterator(android.icu.text.CollationElementIterator) Test(org.junit.Test)

Example 8 with CollationElementIterator

use of android.icu.text.CollationElementIterator in project j2objc by google.

the class CollationIteratorTest method TestDiscontiguous.

/**
 * Testing the discontiguous contractions
 */
@Test
public void TestDiscontiguous() {
    String rulestr = "&z < AB < X\u0300 < ABC < X\u0300\u0315";
    String[] src = { "ADB", "ADBC", "A\u0315B", "A\u0315BC", // base character blocked
    "XD\u0300", "XD\u0300\u0315", // non blocking combining character
    "X\u0319\u0300", "X\u0319\u0300\u0315", // blocking combining character
    "X\u0314\u0300", "X\u0314\u0300\u0315", // contraction prefix
    "ABDC", "AB\u0315C", "X\u0300D\u0315", "X\u0300\u0319\u0315", "X\u0300\u031A\u0315", // ends not with a contraction character
    "X\u0319\u0300D", "X\u0319\u0300\u0315D", "X\u0300D\u0315D", "X\u0300\u0319\u0315D", "X\u0300\u031A\u0315D" };
    String[] tgt = { // non blocking combining character
    "A D B", "A D BC", "A \u0315 B", "A \u0315 BC", // base character blocked
    "X D \u0300", "X D \u0300\u0315", // non blocking combining character
    "X\u0300 \u0319", "X\u0300\u0315 \u0319", // blocking combining character
    "X \u0314 \u0300", "X \u0314 \u0300\u0315", // contraction prefix
    "AB DC", "AB \u0315 C", "X\u0300 D \u0315", "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315", // ends not with a contraction character
    "X\u0300 \u0319D", "X\u0300\u0315 \u0319D", "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D", "X\u0300 \u031A\u0315D" };
    int count = 0;
    try {
        RuleBasedCollator coll = new RuleBasedCollator(rulestr);
        CollationElementIterator iter = coll.getCollationElementIterator("");
        CollationElementIterator resultiter = coll.getCollationElementIterator("");
        while (count < src.length) {
            iter.setText(src[count]);
            int s = 0;
            while (s < tgt[count].length()) {
                int e = tgt[count].indexOf(' ', s);
                if (e < 0) {
                    e = tgt[count].length();
                }
                String resultstr = tgt[count].substring(s, e);
                resultiter.setText(resultstr);
                int ce = resultiter.next();
                while (ce != CollationElementIterator.NULLORDER) {
                    if (ce != iter.next()) {
                        errln("Discontiguos contraction test mismatch at" + count);
                        return;
                    }
                    ce = resultiter.next();
                }
                s = e + 1;
            }
            iter.reset();
            CollationTest.backAndForth(this, iter);
            count++;
        }
    } catch (Exception e) {
        warnln("Error running discontiguous tests " + e.toString());
    }
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationElementIterator(android.icu.text.CollationElementIterator) Test(org.junit.Test)

Example 9 with CollationElementIterator

use of android.icu.text.CollationElementIterator in project j2objc by google.

the class CollationIteratorTest method TestPrevious.

/**
 * Test for CollationElementIterator.previous()
 *
 * @bug 4108758 - Make sure it works with contracting characters
 */
@Test
public void TestPrevious() /* char* par */
{
    RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
    CollationElementIterator iter = en_us.getCollationElementIterator(test1);
    // A basic test to see if it's working at all
    CollationTest.backAndForth(this, iter);
    // Test with a contracting character sequence
    String source;
    RuleBasedCollator c1 = null;
    try {
        c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    } catch (Exception e) {
        errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
        return;
    }
    source = "abchdcba";
    iter = c1.getCollationElementIterator(source);
    CollationTest.backAndForth(this, iter);
    // Test with an expanding character sequence
    RuleBasedCollator c2 = null;
    try {
        c2 = new RuleBasedCollator("&a < b < c/abd < d");
    } catch (Exception e) {
        errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
        return;
    }
    source = "abcd";
    iter = c2.getCollationElementIterator(source);
    CollationTest.backAndForth(this, iter);
    // Now try both
    RuleBasedCollator c3 = null;
    try {
        c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
    } catch (Exception e) {
        errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
        return;
    }
    source = "abcdbchdc";
    iter = c3.getCollationElementIterator(source);
    CollationTest.backAndForth(this, iter);
    source = "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
    Collator c4 = null;
    try {
        c4 = Collator.getInstance(new Locale("th", "TH", ""));
    } catch (Exception e) {
        errln("Couldn't create a collator");
        return;
    }
    iter = ((RuleBasedCollator) c4).getCollationElementIterator(source);
    CollationTest.backAndForth(this, iter);
    source = "\u0061\u30CF\u3099\u30FC";
    Collator c5 = null;
    try {
        c5 = Collator.getInstance(new Locale("ja", "JP", ""));
    } catch (Exception e) {
        errln("Couldn't create Japanese collator\n");
        return;
    }
    iter = ((RuleBasedCollator) c5).getCollationElementIterator(source);
    CollationTest.backAndForth(this, iter);
}
Also used : Locale(java.util.Locale) ULocale(android.icu.util.ULocale) RuleBasedCollator(android.icu.text.RuleBasedCollator) CollationElementIterator(android.icu.text.CollationElementIterator) RuleBasedCollator(android.icu.text.RuleBasedCollator) Collator(android.icu.text.Collator) Test(org.junit.Test)

Example 10 with CollationElementIterator

use of android.icu.text.CollationElementIterator in project j2objc by google.

the class CollationAPITest method TestElemIter.

/**
 * This tests the CollationElementIterator related APIs.
 * - creation of a CollationElementIterator object
 * - == and != operators
 * - iterating forward
 * - reseting the iterator index
 * - requesting the order properties(primary, secondary or tertiary)
 */
@Test
public void TestElemIter() {
    // logln("testing sortkey begins...");
    Collator col = Collator.getInstance(Locale.ENGLISH);
    String testString1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
    String testString2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
    // logln("Constructors and comparison testing....");
    CollationElementIterator iterator1 = ((RuleBasedCollator) col).getCollationElementIterator(testString1);
    CharacterIterator chariter = new StringCharacterIterator(testString1);
    // copy ctor
    CollationElementIterator iterator2 = ((RuleBasedCollator) col).getCollationElementIterator(chariter);
    UCharacterIterator uchariter = UCharacterIterator.getInstance(testString2);
    CollationElementIterator iterator3 = ((RuleBasedCollator) col).getCollationElementIterator(uchariter);
    int offset = 0;
    offset = iterator1.getOffset();
    if (offset != 0) {
        errln("Error in getOffset for collation element iterator");
        return;
    }
    iterator1.setOffset(6);
    iterator1.setOffset(0);
    int order1, order2, order3;
    order1 = iterator1.next();
    doAssert(!(iterator1.equals(iterator2)), "The first iterator advance failed");
    order2 = iterator2.next();
    // Code coverage for dummy "not designed" hashCode() which does "assert false".
    try {
        // We don't expect any particular value.
        iterator1.hashCode();
    } catch (AssertionError ignored) {
    // Expected to be thrown if assertions are enabled.
    }
    // In ICU 52 and earlier we had iterator1.equals(iterator2)
    // but in ICU 53 this fails because the iterators differ (String vs. CharacterIterator).
    // doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
    doAssert(iterator1.getOffset() == iterator2.getOffset(), "The second iterator advance failed");
    doAssert((order1 == order2), "The order result should be the same");
    order3 = iterator3.next();
    doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be the same");
    doAssert((CollationElementIterator.secondaryOrder(order1) == CollationElementIterator.secondaryOrder(order3)), "The secondary orders should be the same");
    doAssert((CollationElementIterator.tertiaryOrder(order1) == CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be the same");
    order1 = iterator1.next();
    order3 = iterator3.next();
    doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be identical");
    doAssert((CollationElementIterator.tertiaryOrder(order1) != CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be different");
    order1 = iterator1.next();
    order3 = iterator3.next();
    // invalid test wrong in UCA
    // doAssert((CollationElementIterator.secondaryOrder(order1) !=
    // CollationElementIterator.secondaryOrder(order3)), "The secondary orders should not be the same");
    doAssert((order1 != CollationElementIterator.NULLORDER), "Unexpected end of iterator reached");
    iterator1.reset();
    iterator2.reset();
    iterator3.reset();
    order1 = iterator1.next();
    doAssert(!(iterator1.equals(iterator2)), "The first iterator advance failed");
    order2 = iterator2.next();
    // In ICU 52 and earlier we had iterator1.equals(iterator2)
    // but in ICU 53 this fails because the iterators differ (String vs. CharacterIterator).
    // doAssert((iterator1.equals(iterator2)), "The second iterator advance failed");
    doAssert(iterator1.getOffset() == iterator2.getOffset(), "The second iterator advance failed");
    doAssert((order1 == order2), "The order result should be the same");
    order3 = iterator3.next();
    doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be the same");
    doAssert((CollationElementIterator.secondaryOrder(order1) == CollationElementIterator.secondaryOrder(order3)), "The secondary orders should be the same");
    doAssert((CollationElementIterator.tertiaryOrder(order1) == CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be the same");
    order1 = iterator1.next();
    order2 = iterator2.next();
    order3 = iterator3.next();
    doAssert((CollationElementIterator.primaryOrder(order1) == CollationElementIterator.primaryOrder(order3)), "The primary orders should be identical");
    doAssert((CollationElementIterator.tertiaryOrder(order1) != CollationElementIterator.tertiaryOrder(order3)), "The tertiary orders should be different");
    order1 = iterator1.next();
    order3 = iterator3.next();
    // obsolete invalid test, removed
    // doAssert((CollationElementIterator.secondaryOrder(order1) !=
    // CollationElementIterator.secondaryOrder(order3)), "The secondary orders should not be the same");
    doAssert((order1 != CollationElementIterator.NULLORDER), "Unexpected end of iterator reached");
    doAssert(!(iterator2.equals(iterator3)), "The iterators should be different");
    logln("testing CollationElementIterator ends...");
}
Also used : RuleBasedCollator(android.icu.text.RuleBasedCollator) StringCharacterIterator(java.text.StringCharacterIterator) UCharacterIterator(android.icu.text.UCharacterIterator) CharacterIterator(java.text.CharacterIterator) StringCharacterIterator(java.text.StringCharacterIterator) UCharacterIterator(android.icu.text.UCharacterIterator) CollationElementIterator(android.icu.text.CollationElementIterator) Collator(android.icu.text.Collator) RuleBasedCollator(android.icu.text.RuleBasedCollator) Test(org.junit.Test)

Aggregations

CollationElementIterator (android.icu.text.CollationElementIterator)26 RuleBasedCollator (android.icu.text.RuleBasedCollator)25 Test (org.junit.Test)25 ULocale (android.icu.util.ULocale)4 Collator (android.icu.text.Collator)3 Locale (java.util.Locale)3 CollationKey (android.icu.text.CollationKey)2 RawCollationKey (android.icu.text.RawCollationKey)2 UCharacterIterator (android.icu.text.UCharacterIterator)2 CharacterIterator (java.text.CharacterIterator)2 StringCharacterIterator (java.text.StringCharacterIterator)2 IOException (java.io.IOException)1 ParseException (java.text.ParseException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 MissingResourceException (java.util.MissingResourceException)1