Examples with UCaseProps - android.icu.impl.UCaseProps

Example 1 with UCaseProps

use of android.icu.impl.UCaseProps in project j2objc by google.

the class UnicodeSet method closeOver.

/**
 * Close this set over the given attribute.  For the attribute
 * CASE, the result is to modify this set so that:
 *
 * 1. For each character or string 'a' in this set, all strings
 * 'b' such that foldCase(a) == foldCase(b) are added to this set.
 * (For most 'a' that are single characters, 'b' will have
 * b.length() == 1.)
 *
 * 2. For each string 'e' in the resulting set, if e !=
 * foldCase(e), 'e' will be removed.
 *
 * Example: [aq\u00DF{Bc}{bC}{Fi}] =&gt; [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
 *
 * (Here foldCase(x) refers to the operation
 * UCharacter.foldCase(x, true), and a == b actually denotes
 * a.equals(b), not pointer comparison.)
 *
 * @param attribute bitmask for attributes to close over.
 * Currently only the CASE bit is supported.  Any undefined bits
 * are ignored.
 * @return a reference to this set.
 */
public UnicodeSet closeOver(int attribute) {
    checkFrozen();
    if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
        UCaseProps csp = UCaseProps.INSTANCE;
        UnicodeSet foldSet = new UnicodeSet(this);
        ULocale root = ULocale.ROOT;
        // therefore, start with no strings and add only those needed
        if ((attribute & CASE) != 0) {
            foldSet.strings.clear();
        }
        int n = getRangeCount();
        int result;
        StringBuilder full = new StringBuilder();
        for (int i = 0; i < n; ++i) {
            int start = getRangeStart(i);
            int end = getRangeEnd(i);
            if ((attribute & CASE) != 0) {
                // full case closure
                for (int cp = start; cp <= end; ++cp) {
                    csp.addCaseClosure(cp, foldSet);
                }
            } else {
                // (does not add long s for regular s, or Kelvin for k, for example)
                for (int cp = start; cp <= end; ++cp) {
                    result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
                    addCaseMapping(foldSet, result, full);
                    result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
                    addCaseMapping(foldSet, result, full);
                    result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
                    addCaseMapping(foldSet, result, full);
                    result = csp.toFullFolding(cp, full, 0);
                    addCaseMapping(foldSet, result, full);
                }
            }
        }
        if (!strings.isEmpty()) {
            if ((attribute & CASE) != 0) {
                for (String s : strings) {
                    String str = UCharacter.foldCase(s, 0);
                    if (!csp.addStringCaseClosure(str, foldSet)) {
                        // does not map to code points: add the folded string itself
                        foldSet.add(str);
                    }
                }
            } else {
                BreakIterator bi = BreakIterator.getWordInstance(root);
                for (String str : strings) {
                    // TODO: call lower-level functions
                    foldSet.add(UCharacter.toLowerCase(root, str));
                    foldSet.add(UCharacter.toTitleCase(root, str, bi));
                    foldSet.add(UCharacter.toUpperCase(root, str));
                    foldSet.add(UCharacter.foldCase(str, 0));
                }
            }
        }
        set(foldSet);
    }
    return this;
}

Also used : ULocale(android.icu.util.ULocale) UCaseProps(android.icu.impl.UCaseProps)

Example 2 with UCaseProps

use of android.icu.impl.UCaseProps in project j2objc by google.

the class Normalizer method getFC_NFKC_Closure.

/**
 * Gets the FC_NFKC closure value.
 * @param c The code point whose closure value is to be retrieved
 * @return String representation of the closure value; "" if there is none
 * @deprecated ICU 56
 * @hide original deprecated declaration
 */
@Deprecated
public static String getFC_NFKC_Closure(int c) {
    // Compute the FC_NFKC_Closure on the fly:
    // We have the API for complete coverage of Unicode properties, although
    // this value by itself is not useful via API.
    // (What could be useful is a custom normalization table that combines
    // case folding and NFKC.)
    // For the derivation, see Unicode's DerivedNormalizationProps.txt.
    Normalizer2 nfkc = NFKCModeImpl.INSTANCE.normalizer2;
    UCaseProps csp = UCaseProps.INSTANCE;
    // first: b = NFKC(Fold(a))
    StringBuilder folded = new StringBuilder();
    int folded1Length = csp.toFullFolding(c, folded, 0);
    if (folded1Length < 0) {
        Normalizer2Impl nfkcImpl = ((Norm2AllModes.Normalizer2WithImpl) nfkc).impl;
        if (nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c)) != 0) {
            // c does not change at all under CaseFolding+NFKC
            return "";
        }
        folded.appendCodePoint(c);
    } else {
        if (folded1Length > UCaseProps.MAX_STRING_LENGTH) {
            folded.appendCodePoint(folded1Length);
        }
    }
    String kc1 = nfkc.normalize(folded);
    // second: c = NFKC(Fold(b))
    String kc2 = nfkc.normalize(UCharacter.foldCase(kc1, 0));
    // if (c != b) add the mapping from a to c
    if (kc1.equals(kc2)) {
        return "";
    } else {
        return kc2;
    }
}

Also used : UCaseProps(android.icu.impl.UCaseProps) Normalizer2Impl(android.icu.impl.Normalizer2Impl)

Example 3 with UCaseProps

use of android.icu.impl.UCaseProps in project j2objc by google.

the class Normalizer method cmpEquivFold.

/* internal function; package visibility for use by UTF16.StringComparator */
/*package*/
static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
    Normalizer2Impl nfcImpl;
    UCaseProps csp;
    /* current-level start/limit - s1/s2 as current */
    int s1, s2, limit1, limit2;
    /* decomposition and case folding variables */
    int length;
    /* stacks of previous-level start/current/limit */
    CmpEquivLevel[] stack1 = null, stack2 = null;
    /* buffers for algorithmic decompositions */
    String decomp1, decomp2;
    /* case folding buffers, only use current-level start/limit */
    StringBuilder fold1, fold2;
    /* track which is the current level per string */
    int level1, level2;
    /* current code units, and code points for lookups */
    int c1, c2, cp1, cp2;
    /* normalization/properties data loaded? */
    if ((options & COMPARE_EQUIV) != 0) {
        nfcImpl = Norm2AllModes.getNFCInstance().impl;
    } else {
        nfcImpl = null;
    }
    if ((options & COMPARE_IGNORE_CASE) != 0) {
        csp = UCaseProps.INSTANCE;
        fold1 = new StringBuilder();
        fold2 = new StringBuilder();
    } else {
        csp = null;
        fold1 = fold2 = null;
    }
    /* initialize */
    s1 = 0;
    limit1 = cs1.length();
    s2 = 0;
    limit2 = cs2.length();
    level1 = level2 = 0;
    c1 = c2 = -1;
    /* comparison loop */
    for (; ; ) {
        if (c1 < 0) {
            /* get next code unit from string 1, post-increment */
            for (; ; ) {
                if (s1 == limit1) {
                    if (level1 == 0) {
                        c1 = -1;
                        break;
                    }
                } else {
                    c1 = cs1.charAt(s1++);
                    break;
                }
                /* reached end of level buffer, pop one level */
                do {
                    --level1;
                    cs1 = stack1[level1].cs;
                } while (cs1 == null);
                s1 = stack1[level1].s;
                limit1 = cs1.length();
            }
        }
        if (c2 < 0) {
            /* get next code unit from string 2, post-increment */
            for (; ; ) {
                if (s2 == limit2) {
                    if (level2 == 0) {
                        c2 = -1;
                        break;
                    }
                } else {
                    c2 = cs2.charAt(s2++);
                    break;
                }
                /* reached end of level buffer, pop one level */
                do {
                    --level2;
                    cs2 = stack2[level2].cs;
                } while (cs2 == null);
                s2 = stack2[level2].s;
                limit2 = cs2.length();
            }
        }
        /*
             * compare c1 and c2
             * either variable c1, c2 is -1 only if the corresponding string is finished
             */
        if (c1 == c2) {
            if (c1 < 0) {
                return 0;
            /* c1==c2==-1 indicating end of strings */
            }
            c1 = c2 = -1;
            /* make us fetch new code units */
            continue;
        } else if (c1 < 0) {
            return -1;
        /* string 1 ends before string 2 */
        } else if (c2 < 0) {
            return 1;
        /* string 2 ends before string 1 */
        }
        /* c1!=c2 && c1>=0 && c2>=0 */
        /* get complete code points for c1, c2 for lookups if either is a surrogate */
        cp1 = c1;
        if (UTF16.isSurrogate((char) c1)) {
            char c;
            if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                if (s1 != limit1 && Character.isLowSurrogate(c = cs1.charAt(s1))) {
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
                    cp1 = Character.toCodePoint((char) c1, c);
                }
            } else /* isTrail(c1) */
            {
                if (0 <= (s1 - 2) && Character.isHighSurrogate(c = cs1.charAt(s1 - 2))) {
                    cp1 = Character.toCodePoint(c, (char) c1);
                }
            }
        }
        cp2 = c2;
        if (UTF16.isSurrogate((char) c2)) {
            char c;
            if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                if (s2 != limit2 && Character.isLowSurrogate(c = cs2.charAt(s2))) {
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
                    cp2 = Character.toCodePoint((char) c2, c);
                }
            } else /* isTrail(c2) */
            {
                if (0 <= (s2 - 2) && Character.isHighSurrogate(c = cs2.charAt(s2 - 2))) {
                    cp2 = Character.toCodePoint(c, (char) c2);
                }
            }
        }
        if (level1 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp1, fold1, options)) >= 0) {
            /* cp1 case-folds to the code point "length" or to p[length] */
            if (UTF16.isSurrogate((char) c1)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s1;
                } else /* isTrail(c1) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s2;
                    c2 = cs2.charAt(s2 - 1);
                }
            }
            /* push current level pointers */
            if (stack1 == null) {
                stack1 = createCmpEquivLevelStack();
            }
            stack1[0].cs = cs1;
            stack1[0].s = s1;
            ++level1;
            /* Java: the buffer was probably not empty, remove the old contents */
            if (length <= UCaseProps.MAX_STRING_LENGTH) {
                fold1.delete(0, fold1.length() - length);
            } else {
                fold1.setLength(0);
                fold1.appendCodePoint(length);
            }
            /* set next level pointers to case folding */
            cs1 = fold1;
            s1 = 0;
            limit1 = fold1.length();
            /* get ready to read from decomposition, continue with loop */
            c1 = -1;
            continue;
        }
        if (level2 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp2, fold2, options)) >= 0) {
            /* cp2 case-folds to the code point "length" or to p[length] */
            if (UTF16.isSurrogate((char) c2)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s2;
                } else /* isTrail(c2) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s1;
                    c1 = cs1.charAt(s1 - 1);
                }
            }
            /* push current level pointers */
            if (stack2 == null) {
                stack2 = createCmpEquivLevelStack();
            }
            stack2[0].cs = cs2;
            stack2[0].s = s2;
            ++level2;
            /* Java: the buffer was probably not empty, remove the old contents */
            if (length <= UCaseProps.MAX_STRING_LENGTH) {
                fold2.delete(0, fold2.length() - length);
            } else {
                fold2.setLength(0);
                fold2.appendCodePoint(length);
            }
            /* set next level pointers to case folding */
            cs2 = fold2;
            s2 = 0;
            limit2 = fold2.length();
            /* get ready to read from decomposition, continue with loop */
            c2 = -1;
            continue;
        }
        if (level1 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp1 = nfcImpl.getDecomposition(cp1)) != null) {
            /* cp1 decomposes into p[length] */
            if (UTF16.isSurrogate((char) c1)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                    /* advance beyond source surrogate pair if it decomposes */
                    ++s1;
                } else /* isTrail(c1) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s2;
                    c2 = cs2.charAt(s2 - 1);
                }
            }
            /* push current level pointers */
            if (stack1 == null) {
                stack1 = createCmpEquivLevelStack();
            }
            stack1[level1].cs = cs1;
            stack1[level1].s = s1;
            ++level1;
            /* set empty intermediate level if skipped */
            if (level1 < 2) {
                stack1[level1++].cs = null;
            }
            /* set next level pointers to decomposition */
            cs1 = decomp1;
            s1 = 0;
            limit1 = decomp1.length();
            /* get ready to read from decomposition, continue with loop */
            c1 = -1;
            continue;
        }
        if (level2 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp2 = nfcImpl.getDecomposition(cp2)) != null) {
            /* cp2 decomposes into p[length] */
            if (UTF16.isSurrogate((char) c2)) {
                if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                    /* advance beyond source surrogate pair if it decomposes */
                    ++s2;
                } else /* isTrail(c2) */
                {
                    /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                    --s1;
                    c1 = cs1.charAt(s1 - 1);
                }
            }
            /* push current level pointers */
            if (stack2 == null) {
                stack2 = createCmpEquivLevelStack();
            }
            stack2[level2].cs = cs2;
            stack2[level2].s = s2;
            ++level2;
            /* set empty intermediate level if skipped */
            if (level2 < 2) {
                stack2[level2++].cs = null;
            }
            /* set next level pointers to decomposition */
            cs2 = decomp2;
            s2 = 0;
            limit2 = decomp2.length();
            /* get ready to read from decomposition, continue with loop */
            c2 = -1;
            continue;
        }
        if (c1 >= 0xd800 && c2 >= 0xd800 && (options & COMPARE_CODE_POINT_ORDER) != 0) {
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
            if ((c1 <= 0xdbff && s1 != limit1 && Character.isLowSurrogate(cs1.charAt(s1))) || (Character.isLowSurrogate((char) c1) && 0 != (s1 - 1) && Character.isHighSurrogate(cs1.charAt(s1 - 2)))) {
            /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c1 -= 0x2800;
            }
            if ((c2 <= 0xdbff && s2 != limit2 && Character.isLowSurrogate(cs2.charAt(s2))) || (Character.isLowSurrogate((char) c2) && 0 != (s2 - 1) && Character.isHighSurrogate(cs2.charAt(s2 - 2)))) {
            /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c2 -= 0x2800;
            }
        }
        return c1 - c2;
    }
}

Also used : Normalizer2Impl(android.icu.impl.Normalizer2Impl) UCaseProps(android.icu.impl.UCaseProps)

Aggregations

UCaseProps (android.icu.impl.UCaseProps)3 Normalizer2Impl (android.icu.impl.Normalizer2Impl)2 ULocale (android.icu.util.ULocale)1