use of android.icu.impl.Normalizer2Impl in project j2objc by google.
the class Normalizer method cmpEquivFold.
/* internal function; package visibility for use by UTF16.StringComparator */
/*package*/
static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
Normalizer2Impl nfcImpl;
UCaseProps csp;
/* current-level start/limit - s1/s2 as current */
int s1, s2, limit1, limit2;
/* decomposition and case folding variables */
int length;
/* stacks of previous-level start/current/limit */
CmpEquivLevel[] stack1 = null, stack2 = null;
/* buffers for algorithmic decompositions */
String decomp1, decomp2;
/* case folding buffers, only use current-level start/limit */
StringBuilder fold1, fold2;
/* track which is the current level per string */
int level1, level2;
/* current code units, and code points for lookups */
int c1, c2, cp1, cp2;
/* normalization/properties data loaded? */
if ((options & COMPARE_EQUIV) != 0) {
nfcImpl = Norm2AllModes.getNFCInstance().impl;
} else {
nfcImpl = null;
}
if ((options & COMPARE_IGNORE_CASE) != 0) {
csp = UCaseProps.INSTANCE;
fold1 = new StringBuilder();
fold2 = new StringBuilder();
} else {
csp = null;
fold1 = fold2 = null;
}
/* initialize */
s1 = 0;
limit1 = cs1.length();
s2 = 0;
limit2 = cs2.length();
level1 = level2 = 0;
c1 = c2 = -1;
/* comparison loop */
for (; ; ) {
if (c1 < 0) {
/* get next code unit from string 1, post-increment */
for (; ; ) {
if (s1 == limit1) {
if (level1 == 0) {
c1 = -1;
break;
}
} else {
c1 = cs1.charAt(s1++);
break;
}
/* reached end of level buffer, pop one level */
do {
--level1;
cs1 = stack1[level1].cs;
} while (cs1 == null);
s1 = stack1[level1].s;
limit1 = cs1.length();
}
}
if (c2 < 0) {
/* get next code unit from string 2, post-increment */
for (; ; ) {
if (s2 == limit2) {
if (level2 == 0) {
c2 = -1;
break;
}
} else {
c2 = cs2.charAt(s2++);
break;
}
/* reached end of level buffer, pop one level */
do {
--level2;
cs2 = stack2[level2].cs;
} while (cs2 == null);
s2 = stack2[level2].s;
limit2 = cs2.length();
}
}
/*
* compare c1 and c2
* either variable c1, c2 is -1 only if the corresponding string is finished
*/
if (c1 == c2) {
if (c1 < 0) {
return 0;
/* c1==c2==-1 indicating end of strings */
}
c1 = c2 = -1;
/* make us fetch new code units */
continue;
} else if (c1 < 0) {
return -1;
/* string 1 ends before string 2 */
} else if (c2 < 0) {
return 1;
/* string 2 ends before string 1 */
}
/* c1!=c2 && c1>=0 && c2>=0 */
/* get complete code points for c1, c2 for lookups if either is a surrogate */
cp1 = c1;
if (UTF16.isSurrogate((char) c1)) {
char c;
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
if (s1 != limit1 && Character.isLowSurrogate(c = cs1.charAt(s1))) {
/* advance ++s1; only below if cp1 decomposes/case-folds */
cp1 = Character.toCodePoint((char) c1, c);
}
} else /* isTrail(c1) */
{
if (0 <= (s1 - 2) && Character.isHighSurrogate(c = cs1.charAt(s1 - 2))) {
cp1 = Character.toCodePoint(c, (char) c1);
}
}
}
cp2 = c2;
if (UTF16.isSurrogate((char) c2)) {
char c;
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
if (s2 != limit2 && Character.isLowSurrogate(c = cs2.charAt(s2))) {
/* advance ++s2; only below if cp2 decomposes/case-folds */
cp2 = Character.toCodePoint((char) c2, c);
}
} else /* isTrail(c2) */
{
if (0 <= (s2 - 2) && Character.isHighSurrogate(c = cs2.charAt(s2 - 2))) {
cp2 = Character.toCodePoint(c, (char) c2);
}
}
}
if (level1 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp1, fold1, options)) >= 0) {
/* cp1 case-folds to the code point "length" or to p[length] */
if (UTF16.isSurrogate((char) c1)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
/* advance beyond source surrogate pair if it case-folds */
++s1;
} else /* isTrail(c1) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s2;
c2 = cs2.charAt(s2 - 1);
}
}
/* push current level pointers */
if (stack1 == null) {
stack1 = createCmpEquivLevelStack();
}
stack1[0].cs = cs1;
stack1[0].s = s1;
++level1;
/* Java: the buffer was probably not empty, remove the old contents */
if (length <= UCaseProps.MAX_STRING_LENGTH) {
fold1.delete(0, fold1.length() - length);
} else {
fold1.setLength(0);
fold1.appendCodePoint(length);
}
/* set next level pointers to case folding */
cs1 = fold1;
s1 = 0;
limit1 = fold1.length();
/* get ready to read from decomposition, continue with loop */
c1 = -1;
continue;
}
if (level2 == 0 && (options & COMPARE_IGNORE_CASE) != 0 && (length = csp.toFullFolding(cp2, fold2, options)) >= 0) {
/* cp2 case-folds to the code point "length" or to p[length] */
if (UTF16.isSurrogate((char) c2)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
/* advance beyond source surrogate pair if it case-folds */
++s2;
} else /* isTrail(c2) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s1;
c1 = cs1.charAt(s1 - 1);
}
}
/* push current level pointers */
if (stack2 == null) {
stack2 = createCmpEquivLevelStack();
}
stack2[0].cs = cs2;
stack2[0].s = s2;
++level2;
/* Java: the buffer was probably not empty, remove the old contents */
if (length <= UCaseProps.MAX_STRING_LENGTH) {
fold2.delete(0, fold2.length() - length);
} else {
fold2.setLength(0);
fold2.appendCodePoint(length);
}
/* set next level pointers to case folding */
cs2 = fold2;
s2 = 0;
limit2 = fold2.length();
/* get ready to read from decomposition, continue with loop */
c2 = -1;
continue;
}
if (level1 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp1 = nfcImpl.getDecomposition(cp1)) != null) {
/* cp1 decomposes into p[length] */
if (UTF16.isSurrogate((char) c1)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
/* advance beyond source surrogate pair if it decomposes */
++s1;
} else /* isTrail(c1) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s2;
c2 = cs2.charAt(s2 - 1);
}
}
/* push current level pointers */
if (stack1 == null) {
stack1 = createCmpEquivLevelStack();
}
stack1[level1].cs = cs1;
stack1[level1].s = s1;
++level1;
/* set empty intermediate level if skipped */
if (level1 < 2) {
stack1[level1++].cs = null;
}
/* set next level pointers to decomposition */
cs1 = decomp1;
s1 = 0;
limit1 = decomp1.length();
/* get ready to read from decomposition, continue with loop */
c1 = -1;
continue;
}
if (level2 < 2 && (options & COMPARE_EQUIV) != 0 && (decomp2 = nfcImpl.getDecomposition(cp2)) != null) {
/* cp2 decomposes into p[length] */
if (UTF16.isSurrogate((char) c2)) {
if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
/* advance beyond source surrogate pair if it decomposes */
++s2;
} else /* isTrail(c2) */
{
/*
* we got a supplementary code point when hitting its trail surrogate,
* therefore the lead surrogate must have been the same as in the other string;
* compare this decomposition with the lead surrogate in the other string
* remember that this simulates bulk text replacement:
* the decomposition would replace the entire code point
*/
--s1;
c1 = cs1.charAt(s1 - 1);
}
}
/* push current level pointers */
if (stack2 == null) {
stack2 = createCmpEquivLevelStack();
}
stack2[level2].cs = cs2;
stack2[level2].s = s2;
++level2;
/* set empty intermediate level if skipped */
if (level2 < 2) {
stack2[level2++].cs = null;
}
/* set next level pointers to decomposition */
cs2 = decomp2;
s2 = 0;
limit2 = decomp2.length();
/* get ready to read from decomposition, continue with loop */
c2 = -1;
continue;
}
if (c1 >= 0xd800 && c2 >= 0xd800 && (options & COMPARE_CODE_POINT_ORDER) != 0) {
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
if ((c1 <= 0xdbff && s1 != limit1 && Character.isLowSurrogate(cs1.charAt(s1))) || (Character.isLowSurrogate((char) c1) && 0 != (s1 - 1) && Character.isHighSurrogate(cs1.charAt(s1 - 2)))) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c1 -= 0x2800;
}
if ((c2 <= 0xdbff && s2 != limit2 && Character.isLowSurrogate(cs2.charAt(s2))) || (Character.isLowSurrogate((char) c2) && 0 != (s2 - 1) && Character.isHighSurrogate(cs2.charAt(s2 - 2)))) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c2 -= 0x2800;
}
}
return c1 - c2;
}
}
use of android.icu.impl.Normalizer2Impl in project j2objc by google.
the class BasicTest method TestGetsFromImpl.
/*
* This unit test covers two 'get' methods in class Normalizer2Impl. It only tests that
* an object is returned.
*/
@Test
public void TestGetsFromImpl() {
Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
assertNotEquals("getNormTrie() returns null", null, nfcImpl.getNormTrie());
assertNotEquals("getFCD16FromBelow180() returns null", null, nfcImpl.getFCD16FromBelow180(0));
}
Aggregations