Search in sources :

Example 6 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class Normalizer method cmpEquivFold.

/* internal function; package visibility for use by UTF16.StringComparator */
/*package*/
static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
    Normalizer2Impl nfcImpl;
    UCaseProps csp;
    /* current-level start/limit - s1/s2 as current */
    int s1, s2, limit1, limit2;
    /* decomposition and case folding variables */
    int length;
    /* stacks of previous-level start/current/limit */
    CmpEquivLevel[] stack1 = null, stack2 = null;
    /* buffers for algorithmic decompositions */
    String decomp1, decomp2;
    /* case folding buffers, only use current-level start/limit */
    StringBuilder fold1, fold2;
    /* track which is the current level per string */
    int level1, level2;
    /* current code units, and code points for lookups */
    int c1, c2, cp1, cp2;
    /* normalization/properties data loaded? */
    if ((options & COMPARE_EQUIV) != 0) {
        nfcImpl = Norm2AllModes.getNFCInstance().impl;
    } else {
        nfcImpl = null;
    }
    if ((options & COMPARE_IGNORE_CASE) != 0) {
        csp = UCaseProps.INSTANCE;
        fold1 = new StringBuilder();
        fold2 = new StringBuilder();
    } else {
        csp = null;
        fold1 = fold2 = null;
    }
    /* initialize */
    s1 = 0;
    limit1 = cs1.length();
    s2 = 0;
    limit2 = cs2.length();
    level1 = level2 = 0;
    c1 = c2 = -1;
    /* comparison loop */
    for (; ; ) {
        if (c1 < 0) {
            /* get next code unit from string 1, post-increment */
            for (; ; ) {
                if (s1 == limit1) {
                    if (level1 == 0) {
                        c1 = -1;
                        break;
                    }
                } else {
                    c1 = cs1.charAt(s1++);
                    break;
                }
                /* reached end of level buffer, pop one level */
                do {
                    --level1;
                    cs1 = stack1[level1].cs;
                } while (cs1 == null);
                s1 = stack1[level1].s;
                limit1 = cs1.length();
            }
        }
        if (c2 < 0) {
            /* get next code unit from string 2, post-increment */
            for (; ; ) {
                if (s2 == limit2) {
                    if (level2 == 0) {
                        c2 = -1;
                        break;
                    }
                } else {
                    c2 = cs2.charAt(s2++);
                    break;
                }
                /* reached end of level buffer, pop one level */
                do {
                    --level2;
                    cs2 = stack2[level2].cs;
                } while (cs2 == null);
                s2 = stack2[level2].s;
                limit2 = cs2.length();
            }
        }
        /*
             * compare c1 and c2
             * either variable c1, c2 is -1 only if the corresponding string is finished
             */
        if (c1 == c2) {
            if (c1 < 0) {
                return 0;
            /* c1==c2==-1 indicating end of strings */
            }
            c1 = c2 = -1;
            /* make us fetch new code units */
            continue;
        } else if (c1 < 0) {
            return -1;
        /* string 1 ends before string 2 */
        } else if (c2 < 0) {
            return 1;
        /* string 2 ends before string 1 */
        }
        /* c1!=c2 && c1>=0 && c2>=0 */
        /* get complete code points for c1, c2 for lookups if either is a surrogate */
        cp1 = c1;
        if (UTF16.isSurrogate((char) c1)) {
            char c;
            if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                if (s1 != limit1 && Character.isLowSurrogate(c = cs1.charAt(s1))) {
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
                    cp1 = Character.toCodePoint((char) c1, c);
                }
            } else /* isTrail(c1) */
            {
                if (0 <= (s1 - 2) && Character.isHighSurrogate(c = cs1.charAt(s1 - 2))) {
                    cp1 = Character.toCodePoint(c, (char) c1);
                }
            }
        }
        cp2 = c2;
        if (UTF16.isSurrogate((char) c2)) {
            char c;
            if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                if (s2 != limit2 && Character.isLowSurrogate(c = cs2.charAt(s2))) {
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
                    cp2 = Character.toCodePoint((char) c2, c);
                }
            } else /* isTrail(c2) */
            {
                if (0 <= (s2 - 2) && Character.isHighSurrogate(c = cs2.charAt(s2 - 2))) {
                    cp2 = Character.toCodePoint(c, (char) c2);
                }
            }
        }
        if (level1 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp1, fold1, options)) >= 0) {
            /* cp1 case-folds to the code point "length" or to p[length] */
            if (UTF16.isSurrogate((char) c1)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s1;
                } else /* isTrail(c1) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s2;
                    c2 = cs2.charAt(s2 - 1);
                }
            }
            /* push current level pointers */
            if (stack1 == null) {
                stack1 = createCmpEquivLevelStack();
            }
            stack1[0].cs = cs1;
            stack1[0].s = s1;
            ++level1;
            /* Java: the buffer was probably not empty, remove the old contents */
            if (length <= UCaseProps.MAX_STRING_LENGTH) {
                fold1.delete(0, fold1.length() - length);
            } else {
                fold1.setLength(0);
                fold1.appendCodePoint(length);
            }
            /* set next level pointers to case folding */
            cs1 = fold1;
            s1 = 0;
            limit1 = fold1.length();
            /* get ready to read from decomposition, continue with loop */
            c1 = -1;
            continue;
        }
        if (level2 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp2, fold2, options)) >= 0) {
            /* cp2 case-folds to the code point "length" or to p[length] */
            if (UTF16.isSurrogate((char) c2)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s2;
                } else /* isTrail(c2) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s1;
                    c1 = cs1.charAt(s1 - 1);
                }
            }
            /* push current level pointers */
            if (stack2 == null) {
                stack2 = createCmpEquivLevelStack();
            }
            stack2[0].cs = cs2;
            stack2[0].s = s2;
            ++level2;
            /* Java: the buffer was probably not empty, remove the old contents */
            if (length <= UCaseProps.MAX_STRING_LENGTH) {
                fold2.delete(0, fold2.length() - length);
            } else {
                fold2.setLength(0);
                fold2.appendCodePoint(length);
            }
            /* set next level pointers to case folding */
            cs2 = fold2;
            s2 = 0;
            limit2 = fold2.length();
            /* get ready to read from decomposition, continue with loop */
            c2 = -1;
            continue;
        }
        if (level1 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp1 = nfcImpl.getDecomposition(cp1)) != null) {
            /* cp1 decomposes into p[length] */
            if (UTF16.isSurrogate((char) c1)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                    /* advance beyond source surrogate pair if it decomposes */
                    ++s1;
                } else /* isTrail(c1) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s2;
                    c2 = cs2.charAt(s2 - 1);
                }
            }
            /* push current level pointers */
            if (stack1 == null) {
                stack1 = createCmpEquivLevelStack();
            }
            stack1[level1].cs = cs1;
            stack1[level1].s = s1;
            ++level1;
            /* set empty intermediate level if skipped */
            if (level1 < 2) {
                stack1[level1++].cs = null;
            }
            /* set next level pointers to decomposition */
            cs1 = decomp1;
            s1 = 0;
            limit1 = decomp1.length();
            /* get ready to read from decomposition, continue with loop */
            c1 = -1;
            continue;
        }
        if (level2 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp2 = nfcImpl.getDecomposition(cp2)) != null) {
            /* cp2 decomposes into p[length] */
            if (UTF16.isSurrogate((char) c2)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                    /* advance beyond source surrogate pair if it decomposes */
                    ++s2;
                } else /* isTrail(c2) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s1;
                    c1 = cs1.charAt(s1 - 1);
                }
            }
            /* push current level pointers */
            if (stack2 == null) {
                stack2 = createCmpEquivLevelStack();
            }
            stack2[level2].cs = cs2;
            stack2[level2].s = s2;
            ++level2;
            /* set empty intermediate level if skipped */
            if (level2 < 2) {
                stack2[level2++].cs = null;
            }
            /* set next level pointers to decomposition */
            cs2 = decomp2;
            s2 = 0;
            limit2 = decomp2.length();
            /* get ready to read from decomposition, continue with loop */
            c2 = -1;
            continue;
        }
        if (c1 >= 0xd800 && c2 >= 0xd800 && (options & COMPARE_CODE_POINT_ORDER) != 0) {
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
            if ((c1 <= 0xdbff && s1 != limit1 && Character.isLowSurrogate(cs1.charAt(s1))) || (Character.isLowSurrogate((char) c1) && 0 != (s1 - 1) && Character.isHighSurrogate(cs1.charAt(s1 - 2)))) {
            /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c1 -= 0x2800;
            }
            if ((c2 <= 0xdbff && s2 != limit2 && Character.isLowSurrogate(cs2.charAt(s2))) || (Character.isLowSurrogate((char) c2) && 0 != (s2 - 1) && Character.isHighSurrogate(cs2.charAt(s2 - 2)))) {
            /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c2 -= 0x2800;
            }
        }
        return c1 - c2;
    }
}
Also used : Normalizer2Impl(android.icu.impl.Normalizer2Impl) UCaseProps(android.icu.impl.UCaseProps)

Example 7 with Normalizer2Impl

use of android.icu.impl.Normalizer2Impl in project j2objc by google.

the class BasicTest method TestGetsFromImpl.

/*
     * This unit test covers two 'get' methods in class Normalizer2Impl. It only tests that
     * an object is returned.
     */
@Test
public void TestGetsFromImpl() {
    Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
    assertNotEquals("getNormTrie() returns null", null, nfcImpl.getNormTrie());
    assertNotEquals("getFCD16FromBelow180() returns null", null, nfcImpl.getFCD16FromBelow180(0));
}
Also used : Normalizer2Impl(android.icu.impl.Normalizer2Impl) Test(org.junit.Test)

Aggregations

Normalizer2Impl (android.icu.impl.Normalizer2Impl)7 Test (org.junit.Test)3 UCaseProps (android.icu.impl.UCaseProps)2 UnicodeSet (android.icu.text.UnicodeSet)2 CollationSettings (android.icu.impl.coll.CollationSettings)1 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)1 Normalizer2 (android.icu.text.Normalizer2)1 UTF16 (android.icu.text.UTF16)1 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)1