use of android.icu.impl.UCaseProps in project j2objc by google.
the class UnicodeSet method closeOver.
/**
* Close this set over the given attribute. For the attribute
* CASE, the result is to modify this set so that:
*
* 1. For each character or string 'a' in this set, all strings
* 'b' such that foldCase(a) == foldCase(b) are added to this set.
* (For most 'a' that are single characters, 'b' will have
* b.length() == 1.)
*
* 2. For each string 'e' in the resulting set, if e !=
* foldCase(e), 'e' will be removed.
*
* Example: [aq\u00DF{Bc}{bC}{Fi}] => [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
*
* (Here foldCase(x) refers to the operation
* UCharacter.foldCase(x, true), and a == b actually denotes
* a.equals(b), not pointer comparison.)
*
* @param attribute bitmask for attributes to close over.
* Currently only the CASE bit is supported. Any undefined bits
* are ignored.
* @return a reference to this set.
*/
public UnicodeSet closeOver(int attribute) {
checkFrozen();
if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
UCaseProps csp = UCaseProps.INSTANCE;
UnicodeSet foldSet = new UnicodeSet(this);
ULocale root = ULocale.ROOT;
// therefore, start with no strings and add only those needed
if ((attribute & CASE) != 0) {
foldSet.strings.clear();
}
int n = getRangeCount();
int result;
StringBuilder full = new StringBuilder();
for (int i = 0; i < n; ++i) {
int start = getRangeStart(i);
int end = getRangeEnd(i);
if ((attribute & CASE) != 0) {
// full case closure
for (int cp = start; cp <= end; ++cp) {
csp.addCaseClosure(cp, foldSet);
}
} else {
// (does not add long s for regular s, or Kelvin for k, for example)
for (int cp = start; cp <= end; ++cp) {
result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
result = csp.toFullFolding(cp, full, 0);
addCaseMapping(foldSet, result, full);
}
}
}
if (!strings.isEmpty()) {
if ((attribute & CASE) != 0) {
for (String s : strings) {
String str = UCharacter.foldCase(s, 0);
if (!csp.addStringCaseClosure(str, foldSet)) {
// does not map to code points: add the folded string itself
foldSet.add(str);
}
}
} else {
BreakIterator bi = BreakIterator.getWordInstance(root);
for (String str : strings) {
// TODO: call lower-level functions
foldSet.add(UCharacter.toLowerCase(root, str));
foldSet.add(UCharacter.toTitleCase(root, str, bi));
foldSet.add(UCharacter.toUpperCase(root, str));
foldSet.add(UCharacter.foldCase(str, 0));
}
}
}
set(foldSet);
}
return this;
}
use of android.icu.impl.UCaseProps in project j2objc by google.
the class Normalizer method getFC_NFKC_Closure.
/**
* Gets the FC_NFKC closure value.
* @param c The code point whose closure value is to be retrieved
* @return String representation of the closure value; "" if there is none
* @deprecated ICU 56
* @hide original deprecated declaration
*/
@Deprecated
public static String getFC_NFKC_Closure(int c) {
// Compute the FC_NFKC_Closure on the fly:
// We have the API for complete coverage of Unicode properties, although
// this value by itself is not useful via API.
// (What could be useful is a custom normalization table that combines
// case folding and NFKC.)
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
Normalizer2 nfkc = NFKCModeImpl.INSTANCE.normalizer2;
UCaseProps csp = UCaseProps.INSTANCE;
// first: b = NFKC(Fold(a))
StringBuilder folded = new StringBuilder();
int folded1Length = csp.toFullFolding(c, folded, 0);
if (folded1Length < 0) {
Normalizer2Impl nfkcImpl = ((Norm2AllModes.Normalizer2WithImpl) nfkc).impl;
if (nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c)) != 0) {
// c does not change at all under CaseFolding+NFKC
return "";
}
folded.appendCodePoint(c);
} else {
if (folded1Length > UCaseProps.MAX_STRING_LENGTH) {
folded.appendCodePoint(folded1Length);
}
}
String kc1 = nfkc.normalize(folded);
// second: c = NFKC(Fold(b))
String kc2 = nfkc.normalize(UCharacter.foldCase(kc1, 0));
// if (c != b) add the mapping from a to c
if (kc1.equals(kc2)) {
return "";
} else {
return kc2;
}
}
use of android.icu.impl.UCaseProps in project j2objc by google.
the class Normalizer method cmpEquivFold.
/* internal function; package visibility for use by UTF16.StringComparator */
/*package*/
static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
Normalizer2Impl nfcImpl;
UCaseProps csp;
/* current-level start/limit - s1/s2 as current */
int s1, s2, limit1, limit2;
/* decomposition and case folding variables */
int length;
/* stacks of previous-level start/current/limit */
CmpEquivLevel[] stack1 = null, stack2 = null;
/* buffers for algorithmic decompositions */
String decomp1, decomp2;
/* case folding buffers, only use current-level start/limit */
StringBuilder fold1, fold2;
/* track which is the current level per string */
int level1, level2;
/* current code units, and code points for lookups */
int c1, c2, cp1, cp2;
/* normalization/properties data loaded? */
if ((options & COMPARE_EQUIV) != 0) {
nfcImpl = Norm2AllModes.getNFCInstance().impl;
} else {
nfcImpl = null;
}
if ((options & COMPARE_IGNORE_CASE) != 0) {
csp = UCaseProps.INSTANCE;
fold1 = new StringBuilder();
fold2 = new StringBuilder();
} else {
csp = null;
fold1 = fold2 = null;
}
/* initialize */
s1 = 0;
limit1 = cs1.length();
s2 = 0;
limit2 = cs2.length();
level1 = level2 = 0;
c1 = c2 = -1;
/* comparison loop */
for (; ; ) {
if (c1 < 0) {
/* get next code unit from string 1, post-increment */
for (; ; ) {
if (s1 == limit1) {
if (level1 == 0) {
c1 = -1;
break;
}
} else {
c1 = cs1.charAt(s1++);
break;
}
/* reached end of level buffer, pop one level */
do {
--level1;
cs1 = stack1[level1].cs;
} while (cs1 == null);
s1 = stack1[level1].s;
limit1 = cs1.length();
}
}
if (c2 < 0) {
/* get next code unit from string 2, post-increment */
for (; ; ) {
if (s2 == limit2) {
if (level2 == 0) {
c2 = -1;
break;
}
} else {
c2 = cs2.charAt(s2++);
break;
}
/* reached end of level buffer, pop one level */
do {
--level2;
cs2 = stack2[level2].cs;
} while (cs2 == null);
s2 = stack2[level2].s;
limit2 = cs2.length();
}
}
/*
* compare c1 and c2
* either variable c1, c2 is -1 only if the corresponding string is finished
*/
if (c1 == c2) {
if (c1 < 0) {
return 0;
/* c1==c2==-1 indicating end of strings */
}
c1 = c2 = -1;
/* make us fetch new code units */
continue;
} else if (c1 < 0) {
return -1;
/* string 1 ends before string 2 */
} else if (c2 < 0) {
return 1;
/* string 2 ends before string 1 */
}
/* c1!=c2 && c1>=0 && c2>=0 */
/* get complete code points for c1, c2 for lookups if either is a surrogate */
cp1 = c1;
if (UTF16.isSurrogate((char) c1)) {
char c;
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
if (s1 != limit1 && Character.isLowSurrogate(c = cs1.charAt(s1))) {
/* advance ++s1; only below if cp1 decomposes/case-folds */
cp1 = Character.toCodePoint((char) c1, c);
}
} else /* isTrail(c1) */
{
if (0 <= (s1 - 2) && Character.isHighSurrogate(c = cs1.charAt(s1 - 2))) {
cp1 = Character.toCodePoint(c, (char) c1);
}
}
}
cp2 = c2;
if (UTF16.isSurrogate((char) c2)) {
char c;
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
if (s2 != limit2 && Character.isLowSurrogate(c = cs2.charAt(s2))) {
/* advance ++s2; only below if cp2 decomposes/case-folds */
cp2 = Character.toCodePoint((char) c2, c);
}
} else /* isTrail(c2) */
{
if (0 <= (s2 - 2) && Character.isHighSurrogate(c = cs2.charAt(s2 - 2))) {
cp2 = Character.toCodePoint(c, (char) c2);
}
}
}
if (level1 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp1, fold1, options)) >= 0) {
/* cp1 case-folds to the code point "length" or to p[length] */
if (UTF16.isSurrogate((char) c1)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
/* advance beyond source surrogate pair if it case-folds */
++s1;
} else /* isTrail(c1) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s2;
c2 = cs2.charAt(s2 - 1);
}
}
/* push current level pointers */
if (stack1 == null) {
stack1 = createCmpEquivLevelStack();
}
stack1[0].cs = cs1;
stack1[0].s = s1;
++level1;
/* Java: the buffer was probably not empty, remove the old contents */
if (length <= UCaseProps.MAX_STRING_LENGTH) {
fold1.delete(0, fold1.length() - length);
} else {
fold1.setLength(0);
fold1.appendCodePoint(length);
}
/* set next level pointers to case folding */
cs1 = fold1;
s1 = 0;
limit1 = fold1.length();
/* get ready to read from decomposition, continue with loop */
c1 = -1;
continue;
}
if (level2 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp2, fold2, options)) >= 0) {
/* cp2 case-folds to the code point "length" or to p[length] */
if (UTF16.isSurrogate((char) c2)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
/* advance beyond source surrogate pair if it case-folds */
++s2;
} else /* isTrail(c2) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s1;
c1 = cs1.charAt(s1 - 1);
}
}
/* push current level pointers */
if (stack2 == null) {
stack2 = createCmpEquivLevelStack();
}
stack2[0].cs = cs2;
stack2[0].s = s2;
++level2;
/* Java: the buffer was probably not empty, remove the old contents */
if (length <= UCaseProps.MAX_STRING_LENGTH) {
fold2.delete(0, fold2.length() - length);
} else {
fold2.setLength(0);
fold2.appendCodePoint(length);
}
/* set next level pointers to case folding */
cs2 = fold2;
s2 = 0;
limit2 = fold2.length();
/* get ready to read from decomposition, continue with loop */
c2 = -1;
continue;
}
if (level1 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp1 = nfcImpl.getDecomposition(cp1)) != null) {
/* cp1 decomposes into p[length] */
if (UTF16.isSurrogate((char) c1)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
/* advance beyond source surrogate pair if it decomposes */
++s1;
} else /* isTrail(c1) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s2;
c2 = cs2.charAt(s2 - 1);
}
}
/* push current level pointers */
if (stack1 == null) {
stack1 = createCmpEquivLevelStack();
}
stack1[level1].cs = cs1;
stack1[level1].s = s1;
++level1;
/* set empty intermediate level if skipped */
if (level1 < 2) {
stack1[level1++].cs = null;
}
/* set next level pointers to decomposition */
cs1 = decomp1;
s1 = 0;
limit1 = decomp1.length();
/* get ready to read from decomposition, continue with loop */
c1 = -1;
continue;
}
if (level2 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp2 = nfcImpl.getDecomposition(cp2)) != null) {
/* cp2 decomposes into p[length] */
if (UTF16.isSurrogate((char) c2)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
/* advance beyond source surrogate pair if it decomposes */
++s2;
} else /* isTrail(c2) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s1;
c1 = cs1.charAt(s1 - 1);
}
}
/* push current level pointers */
if (stack2 == null) {
stack2 = createCmpEquivLevelStack();
}
stack2[level2].cs = cs2;
stack2[level2].s = s2;
++level2;
/* set empty intermediate level if skipped */
if (level2 < 2) {
stack2[level2++].cs = null;
}
/* set next level pointers to decomposition */
cs2 = decomp2;
s2 = 0;
limit2 = decomp2.length();
/* get ready to read from decomposition, continue with loop */
c2 = -1;
continue;
}
if (c1 >= 0xd800 && c2 >= 0xd800 && (options & COMPARE_CODE_POINT_ORDER) != 0) {
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
if ((c1 <= 0xdbff && s1 != limit1 && Character.isLowSurrogate(cs1.charAt(s1))) || (Character.isLowSurrogate((char) c1) && 0 != (s1 - 1) && Character.isHighSurrogate(cs1.charAt(s1 - 2)))) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c1 -= 0x2800;
}
if ((c2 <= 0xdbff && s2 != limit2 && Character.isLowSurrogate(cs2.charAt(s2))) || (Character.isLowSurrogate((char) c2) && 0 != (s2 - 1) && Character.isHighSurrogate(cs2.charAt(s2 - 2)))) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c2 -= 0x2800;
}
}
return c1 - c2;
}
}
Aggregations