use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationBuilder method closeOverComposites.
private void closeOverComposites() {
// empty
String prefix = "";
UnicodeSetIterator iter = new UnicodeSetIterator(COMPOSITES);
while (iter.next()) {
assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
String nfdString = nfd.getDecomposition(iter.codepoint);
cesLength = dataBuilder.getCEs(nfdString, ces, 0);
if (cesLength > Collation.MAX_EXPANSION_LENGTH) {
// However, this can only really happen in contrived cases.
continue;
}
String composite = iter.getString();
addIfDifferent(prefix, composite, ces, cesLength, Collation.UNASSIGNED_CE32);
}
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationBuilder method addTailComposites.
private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
// Look for the last starter in the NFD string.
int lastStarter;
int indexAfterLastStarter = nfdString.length();
for (; ; ) {
// no starter at all
if (indexAfterLastStarter == 0) {
return;
}
lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
if (nfd.getCombiningClass(lastStarter) == 0) {
break;
}
indexAfterLastStarter -= Character.charCount(lastStarter);
}
// No closure to Hangul syllables since we decompose them on the fly.
if (Hangul.isJamoL(lastStarter)) {
return;
}
// Are there any composites whose decomposition starts with the lastStarter?
// Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
// We might find some more equivalent mappings here if it did.
UnicodeSet composites = new UnicodeSet();
if (!nfcImpl.getCanonStartSet(lastStarter, composites)) {
return;
}
StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
UnicodeSetIterator iter = new UnicodeSetIterator(composites);
while (iter.next()) {
assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
int composite = iter.codepoint;
String decomp = nfd.getDecomposition(composite);
if (!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp, newNFDString, newString)) {
continue;
}
int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
if (newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
// Ignore mappings that we cannot store.
continue;
}
// Note: It is possible that the newCEs do not make use of the mapping
// for which we are adding the tail composites, in which case we might be adding
// unnecessary mappings.
// For example, when we add tail composites for ae^ (^=combining circumflex),
// UCA discontiguous-contraction matching does not find any matches
// for ae_^ (_=any combining diacritic below) *unless* there is also
// a contraction mapping for ae.
// Thus, if there is no ae contraction, then the ae^ mapping is ignored
// while fetching the newCEs for ae_^.
// TODO: Try to detect this effectively.
// (Alternatively, print a warning when prefix contractions are missing.)
// We do not need an explicit mapping for the NFD strings.
// It is fine if the NFD input collates like this via a sequence of mappings.
// It also saves a little bit of space, and may reduce the set of characters with contractions.
int ce32 = addIfDifferent(nfdPrefix, newString, newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
if (ce32 != Collation.UNASSIGNED_CE32) {
// was different, was added
addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
}
}
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationMiscTest method TestImportWithType.
@Test
public void TestImportWithType() {
try {
RuleBasedCollator vicoll = (RuleBasedCollator) Collator.getInstance(new ULocale("vi"));
RuleBasedCollator decoll = (RuleBasedCollator) Collator.getInstance(ULocale.forLanguageTag("de-u-co-phonebk"));
RuleBasedCollator videcoll = new RuleBasedCollator(vicoll.getRules() + decoll.getRules());
RuleBasedCollator importvidecoll = new RuleBasedCollator("[import vi][import de-u-co-phonebk]");
UnicodeSet tailoredSet = videcoll.getTailoredSet();
UnicodeSet importTailoredSet = importvidecoll.getTailoredSet();
if (!tailoredSet.equals(importTailoredSet)) {
warnln("Tailored set not equal");
}
for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next(); ) {
String t = it.getString();
CollationKey sk1 = videcoll.getCollationKey(t);
CollationKey sk2 = importvidecoll.getCollationKey(t);
if (!sk1.equals(sk2)) {
warnln("Collation key's not equal for " + t);
}
}
} catch (Exception e) {
// Android patch: Add --omitCollationRules to genrb.
logln("ERROR: in creation of rule based collator");
// Android patch end.
}
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class RoundTripTest method getRepresentativeBoundaryHangul.
private static UnicodeSet getRepresentativeBoundaryHangul() {
UnicodeSet resultToAddTo = new UnicodeSet();
// U+1100 HANGUL CHOSEONG KIYEOK
// U+1161 HANGUL JUNGSEONG A
UnicodeSet L = new UnicodeSet("[:hst=L:]");
UnicodeSet V = new UnicodeSet("[:hst=V:]");
UnicodeSet T = new UnicodeSet("[:hst=T:]");
String prefixLV = "\u1100\u1161";
String prefixL = "\u1100";
String suffixV = "\u1161";
// HANGUL CHOSEONG IEUNG
String nullL = "\u110B";
UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) {
for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) {
String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
}
}
for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) {
// do all combinations of "g" + V + L + "a"
final String suffix = iL.getString() + suffixV;
for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
String sample = prefixL + iV.getString() + suffix;
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
// do all combinations of "ga" + T + L + "a"
for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) {
String sample = prefixLV + iT.getString() + suffix;
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
}
return resultToAddTo;
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class TransliteratorTest method TestGurmukhiDevanagari.
/**
* Test Gurmukhi-Devanagari Tippi and Bindi
*/
@Test
public void TestGurmukhiDevanagari() {
// the rule says:
// (\u0902) (when preceded by vowel) ---> (\u0A02)
// (\u0902) (when preceded by consonant) ---> (\u0A70)
UnicodeSet vowel = new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
UnicodeSet non_vowel = new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
StringBuffer src = new StringBuffer(" \u0902");
StringBuffer expect = new StringBuffer(" \u0A02");
while (vIter.next()) {
src.setCharAt(0, (char) vIter.codepoint);
expect.setCharAt(0, (char) (vIter.codepoint + 0x0100));
expect(trans, src.toString(), expect.toString());
}
expect.setCharAt(1, '\u0A70');
while (nvIter.next()) {
// src.setCharAt(0,(char) nvIter.codepoint);
src.setCharAt(0, (char) nvIter.codepoint);
expect.setCharAt(0, (char) (nvIter.codepoint + 0x0100));
expect(trans, src.toString(), expect.toString());
}
}
Aggregations