use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class LocaleDataTest method TestExemplarSet2.
@Test
public void TestExemplarSet2() {
int equalCount = 0;
HashSet testedExemplars = new HashSet();
for (int i = 0; i < availableLocales.length; i++) {
ULocale locale = availableLocales[i];
LocaleData ld = LocaleData.getInstance(locale);
int[] scriptCodes = UScript.getCode(locale);
if (scriptCodes == null) {
if (locale.toString().indexOf(("in")) < 0) {
errln("UScript.getCode returned null for locale: " + locale);
}
continue;
}
UnicodeSet[] exemplarSets = new UnicodeSet[4];
for (int k = 0; k < 2; ++k) {
// for casing option in (normal, uncased)
int option = (k == 0) ? 0 : UnicodeSet.CASE;
for (int h = 0; h < 2; ++h) {
int type = (h == 0) ? LocaleData.ES_STANDARD : LocaleData.ES_AUXILIARY;
UnicodeSet exemplarSet = ld.getExemplarSet(option, type);
exemplarSets[k * 2 + h] = exemplarSet;
ExemplarGroup exGrp = new ExemplarGroup(exemplarSet, scriptCodes);
if (!testedExemplars.contains(exGrp)) {
testedExemplars.add(exGrp);
UnicodeSet[] sets = new UnicodeSet[scriptCodes.length];
// create the UnicodeSets for the script
for (int j = 0; j < scriptCodes.length; j++) {
sets[j] = new UnicodeSet("[:" + UScript.getShortName(scriptCodes[j]) + ":]");
}
boolean existsInScript = false;
UnicodeSetIterator iter = new UnicodeSetIterator(exemplarSet);
// iterate over the
while (!existsInScript && iter.nextRange()) {
if (iter.codepoint != UnicodeSetIterator.IS_STRING) {
for (int j = 0; j < sets.length; j++) {
if (sets[j].contains(iter.codepoint, iter.codepointEnd)) {
existsInScript = true;
break;
}
}
} else {
for (int j = 0; j < sets.length; j++) {
if (sets[j].contains(iter.string)) {
existsInScript = true;
break;
}
}
}
}
// TODO: How to verify LocaleData.ES_AUXILIARY ???
if (existsInScript == false && h == 0) {
errln("ExemplarSet containment failed for locale,option,type : " + locale + ", " + option + ", " + type);
}
}
}
}
// This is expensive, so only do it if it will be visible
if (isVerbose()) {
logln(locale.toString() + " exemplar(ES_STANDARD)" + exemplarSets[0]);
logln(locale.toString() + " exemplar(ES_AUXILIARY) " + exemplarSets[1]);
logln(locale.toString() + " exemplar(case-folded,ES_STANDARD) " + exemplarSets[2]);
logln(locale.toString() + " exemplar(case-folded,ES_AUXILIARY) " + exemplarSets[3]);
}
assertTrue(locale.toString() + " case-folded is a superset", exemplarSets[2].containsAll(exemplarSets[0]));
assertTrue(locale.toString() + " case-folded is a superset", exemplarSets[3].containsAll(exemplarSets[1]));
if (exemplarSets[2].equals(exemplarSets[0])) {
++equalCount;
}
if (exemplarSets[3].equals(exemplarSets[1])) {
++equalCount;
}
}
// Note: The case-folded set should sometimes be a strict superset
// and sometimes be equal.
assertTrue("case-folded is sometimes a strict superset, and sometimes equal", equalCount > 0 && equalCount < availableLocales.length * 2);
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class LocaleDataTest method TestExemplarSet.
@Test
public void TestExemplarSet() {
HashSet testedExemplars = new HashSet();
int equalCount = 0;
for (int i = 0; i < availableLocales.length; i++) {
ULocale locale = availableLocales[i];
int[] scriptCodes = UScript.getCode(locale);
if (scriptCodes == null) {
// so in effect I can never test the script code for Indonesian :(
if (locale.toString().indexOf(("in")) < 0) {
errln("UScript.getCode returned null for locale: " + locale);
}
continue;
}
UnicodeSet[] exemplarSets = new UnicodeSet[2];
for (int k = 0; k < 2; ++k) {
// for casing option in (normal, caseInsensitive)
int option = (k == 0) ? 0 : UnicodeSet.CASE;
UnicodeSet exemplarSet = LocaleData.getExemplarSet(locale, option);
exemplarSets[k] = exemplarSet;
ExemplarGroup exGrp = new ExemplarGroup(exemplarSet, scriptCodes);
if (!testedExemplars.contains(exGrp)) {
testedExemplars.add(exGrp);
UnicodeSet[] sets = new UnicodeSet[scriptCodes.length];
// create the UnicodeSets for the script
for (int j = 0; j < scriptCodes.length; j++) {
sets[j] = new UnicodeSet("[:" + UScript.getShortName(scriptCodes[j]) + ":]");
}
boolean existsInScript = false;
UnicodeSetIterator iter = new UnicodeSetIterator(exemplarSet);
// iterate over the
while (!existsInScript && iter.nextRange()) {
if (iter.codepoint != UnicodeSetIterator.IS_STRING) {
for (int j = 0; j < sets.length; j++) {
if (sets[j].contains(iter.codepoint, iter.codepointEnd)) {
existsInScript = true;
break;
}
}
} else {
for (int j = 0; j < sets.length; j++) {
if (sets[j].contains(iter.string)) {
existsInScript = true;
break;
}
}
}
}
if (existsInScript == false) {
errln("ExemplarSet containment failed for locale : " + locale);
}
}
}
// This is expensive, so only do it if it will be visible
if (isVerbose()) {
logln(locale.toString() + " exemplar " + exemplarSets[0]);
logln(locale.toString() + " exemplar(case-folded) " + exemplarSets[1]);
}
assertTrue(locale.toString() + " case-folded is a superset", exemplarSets[1].containsAll(exemplarSets[0]));
if (exemplarSets[1].equals(exemplarSets[0])) {
++equalCount;
}
}
// Note: The case-folded set should sometimes be a strict superset
// and sometimes be equal.
assertTrue("case-folded is sometimes a strict superset, and sometimes equal", equalCount > 0 && equalCount < availableLocales.length);
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationTest method TestTailoredElements.
@Test
public void TestTailoredElements() {
CollationData root = CollationRoot.getData();
CollationRootElements rootElements = new CollationRootElements(root.rootElements);
Set<String> prevLocales = new HashSet<String>();
prevLocales.add("");
prevLocales.add("root");
prevLocales.add("root@collation=standard");
long[] ces;
ULocale[] locales = Collator.getAvailableULocales();
String localeID = "root";
int locIdx = 0;
for (; locIdx < locales.length; localeID = locales[locIdx++].getName()) {
ULocale locale = new ULocale(localeID);
String[] types = Collator.getKeywordValuesForLocale("collation", locale, false);
for (int typeIdx = 0; typeIdx < types.length; ++typeIdx) {
// first: default type
String type = types[typeIdx];
if (type.startsWith("private-")) {
errln("Collator.getKeywordValuesForLocale(" + localeID + ") returns private collation keyword: " + type);
}
ULocale localeWithType = locale.setKeywordValue("collation", type);
Collator coll = Collator.getInstance(localeWithType);
ULocale actual = coll.getLocale(ULocale.ACTUAL_LOCALE);
if (prevLocales.contains(actual.getName())) {
continue;
}
prevLocales.add(actual.getName());
logln("TestTailoredElements(): requested " + localeWithType.getName() + " -> actual " + actual.getName());
if (!(coll instanceof RuleBasedCollator)) {
continue;
}
RuleBasedCollator rbc = (RuleBasedCollator) coll;
// Note: It would be better to get tailored strings such that we can
// identify the prefix, and only get the CEs for the prefix+string,
// not also for the prefix.
// There is currently no API for that.
// It would help in an unusual case where a contraction starting in the prefix
// extends past its end, and we do not see the intended mapping.
// For example, for a mapping p|st, if there is also a contraction ps,
// then we get CEs(ps)+CEs(t), rather than CEs(p|st).
UnicodeSet tailored = coll.getTailoredSet();
UnicodeSetIterator iter = new UnicodeSetIterator(tailored);
while (iter.next()) {
String s = iter.getString();
ces = rbc.internalGetCEs(s);
for (int i = 0; i < ces.length; ++i) {
long ce = ces[i];
if (!isValidCE(rootElements, root, ce)) {
logln(prettify(s));
errln("invalid tailored CE 0x" + Utility.hex(ce, 16) + " at CE index " + i + " from string:");
}
}
}
}
}
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class CollationTest method TestImplicits.
@Test
public void TestImplicits() {
CollationData cd = CollationRoot.getData();
// Implicit primary weights should be assigned for the following sets,
// and sort in ascending order by set and then code point.
// See http://www.unicode.org/reports/tr10/#Implicit_Weights
// core Han Unified Ideographs
UnicodeSet coreHan = new UnicodeSet("[\\p{unified_ideograph}&" + "[\\p{Block=CJK_Unified_Ideographs}" + "\\p{Block=CJK_Compatibility_Ideographs}]]");
// all other Unified Han ideographs
UnicodeSet otherHan = new UnicodeSet("[\\p{unified ideograph}-" + "[\\p{Block=CJK_Unified_Ideographs}" + "\\p{Block=CJK_Compatibility_Ideographs}]]");
UnicodeSet unassigned = new UnicodeSet("[[:Cn:][:Cs:][:Co:]]");
// These have special CLDR root mappings.
unassigned.remove(0xfffe, 0xffff);
// Starting with CLDR 26/ICU 54, the root Han order may instead be
// the Unihan radical-stroke order.
// The tests should pass either way, so we only test the order of a small set of Han characters
// whose radical-stroke order is the same as their code point order.
UnicodeSet someHanInCPOrder = new UnicodeSet("[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48" + "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]");
UnicodeSet inOrder = new UnicodeSet(someHanInCPOrder);
inOrder.addAll(unassigned).freeze();
UnicodeSet[] sets = { coreHan, otherHan, unassigned };
int prev = 0;
long prevPrimary = 0;
UTF16CollationIterator ci = new UTF16CollationIterator(cd, false, "", 0);
for (int i = 0; i < sets.length; ++i) {
UnicodeSetIterator iter = new UnicodeSetIterator(sets[i]);
while (iter.next()) {
String s = iter.getString();
int c = s.codePointAt(0);
ci.setText(false, s, 0);
long ce = ci.nextCE();
long ce2 = ci.nextCE();
if (ce == Collation.NO_CE || ce2 != Collation.NO_CE) {
errln("CollationIterator.nextCE(0x" + Utility.hex(c) + ") did not yield exactly one CE");
continue;
}
if ((ce & 0xffffffffL) != Collation.COMMON_SEC_AND_TER_CE) {
errln("CollationIterator.nextCE(U+" + Utility.hex(c, 4) + ") has non-common sec/ter weights: 0x" + Utility.hex(ce & 0xffffffffL, 8));
continue;
}
long primary = ce >>> 32;
if (!(primary > prevPrimary) && inOrder.contains(c) && inOrder.contains(prev)) {
errln("CE(U+" + Utility.hex(c) + ")=0x" + Utility.hex(primary) + ".. not greater than CE(U+" + Utility.hex(prev) + ")=0x" + Utility.hex(prevPrimary) + "..");
}
prev = c;
prevPrimary = primary;
}
}
}
use of android.icu.text.UnicodeSetIterator in project j2objc by google.
the class TransliteratorTest method TestAny.
/**
* Test Any-X transliterators with sample letters from all scripts.
*/
@Test
public void TestAny() {
UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
StringBuffer testString = new StringBuffer();
for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
int count = 5;
for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next(); ) {
testString.append(it.getString());
if (--count < 0)
break;
}
}
logln("Sample set for Any-Latin: " + testString);
Transliterator anyLatin = Transliterator.getInstance("any-Latn");
String result = anyLatin.transliterate(testString.toString());
logln("Sample result for Any-Latin: " + result);
}
Aggregations