use of android.icu.text.UnicodeSet in project j2objc by google.
the class LocaleDataTest method TestEnglishExemplarCharacters.
// Simple test case for checking exemplar character type coverage
@Test
public void TestEnglishExemplarCharacters() {
final char[] testChars = { // standard
0x61, // auxiliary
0xE1, // index
0x41, // filler for deprecated currency exemplar
0, // punctuation
0x2D };
LocaleData ld = LocaleData.getInstance(ULocale.ENGLISH);
for (int type = 0; type < LocaleData.ES_COUNT; type++) {
UnicodeSet exSet = ld.getExemplarSet(0, type);
if (exSet != null) {
if (testChars[type] > 0 && !exSet.contains(testChars[type])) {
errln("Character '" + testChars[type] + "' is not included in exemplar type " + type);
}
}
}
try {
// out of bounds value
ld.getExemplarSet(0, LocaleData.ES_COUNT);
throw new ICUException("Test failure; should throw exception");
} catch (IllegalArgumentException e) {
assertEquals("", "java.lang.ArrayIndexOutOfBoundsException", e.getCause().getClass().getName());
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class LocaleDataTest method TestExemplarSetTypes.
// Test case created for checking type coverage of static getExemplarSet method.
// See #9785, #9794 and #9795
@Test
public void TestExemplarSetTypes() {
final String[] testLocales = { // No auxiliary / index exemplars as of ICU 50
"am", "en", // #9785
"th", // Bogus locale
"foo" };
final int[] testTypes = { LocaleData.ES_STANDARD, LocaleData.ES_AUXILIARY, LocaleData.ES_INDEX, LocaleData.ES_CURRENCY, LocaleData.ES_PUNCTUATION };
final String[] testTypeNames = { "ES_STANDARD", "ES_AUXILIARY", "ES_INDEX", "ES_CURRENCY", "ES_PUNCTUATION" };
for (String locstr : testLocales) {
ULocale loc = new ULocale(locstr);
for (int i = 0; i < testTypes.length; i++) {
try {
UnicodeSet set = LocaleData.getExemplarSet(loc, 0, testTypes[i]);
if (set == null) {
// Not sure null is really OK (#9795)
logln(loc + "(" + testTypeNames[i] + ") returned null");
} else if (set.isEmpty()) {
// This is probably reasonable when data is absent
logln(loc + "(" + testTypeNames[i] + ") returned an empty set");
}
} catch (Exception e) {
errln(loc + "(" + testTypeNames[i] + ") Exception:" + e.getMessage());
}
}
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class CollationDataBuilder method setDigitTags.
protected void setDigitTags() {
UnicodeSet digits = new UnicodeSet("[:Nd:]");
UnicodeSetIterator iter = new UnicodeSetIterator(digits);
while (iter.next()) {
assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
int c = iter.codepoint;
int ce32 = trie.get(c);
if (ce32 != Collation.FALLBACK_CE32 && ce32 != Collation.UNASSIGNED_CE32) {
int index = addCE32(ce32);
if (index > Collation.MAX_INDEX) {
throw new IndexOutOfBoundsException("too many mappings");
// BufferOverflowException is a better fit
// but cannot be constructed with a message string.
}
ce32 = Collation.makeCE32FromTagIndexAndLength(Collation.DIGIT_TAG, index, // u_charDigitValue(c)
UCharacter.digit(c));
trie.set(c, ce32);
}
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class CollationBuilder method addTailComposites.
private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
// Look for the last starter in the NFD string.
int lastStarter;
int indexAfterLastStarter = nfdString.length();
for (; ; ) {
// no starter at all
if (indexAfterLastStarter == 0) {
return;
}
lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
if (nfd.getCombiningClass(lastStarter) == 0) {
break;
}
indexAfterLastStarter -= Character.charCount(lastStarter);
}
// No closure to Hangul syllables since we decompose them on the fly.
if (Hangul.isJamoL(lastStarter)) {
return;
}
// Are there any composites whose decomposition starts with the lastStarter?
// Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
// We might find some more equivalent mappings here if it did.
UnicodeSet composites = new UnicodeSet();
if (!nfcImpl.getCanonStartSet(lastStarter, composites)) {
return;
}
StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
UnicodeSetIterator iter = new UnicodeSetIterator(composites);
while (iter.next()) {
assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
int composite = iter.codepoint;
String decomp = nfd.getDecomposition(composite);
if (!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp, newNFDString, newString)) {
continue;
}
int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
if (newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
// Ignore mappings that we cannot store.
continue;
}
// Note: It is possible that the newCEs do not make use of the mapping
// for which we are adding the tail composites, in which case we might be adding
// unnecessary mappings.
// For example, when we add tail composites for ae^ (^=combining circumflex),
// UCA discontiguous-contraction matching does not find any matches
// for ae_^ (_=any combining diacritic below) *unless* there is also
// a contraction mapping for ae.
// Thus, if there is no ae contraction, then the ae^ mapping is ignored
// while fetching the newCEs for ae_^.
// TODO: Try to detect this effectively.
// (Alternatively, print a warning when prefix contractions are missing.)
// We do not need an explicit mapping for the NFD strings.
// It is fine if the NFD input collates like this via a sequence of mappings.
// It also saves a little bit of space, and may reduce the set of characters with contractions.
int ce32 = addIfDifferent(nfdPrefix, newString, newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
if (ce32 != Collation.UNASSIGNED_CE32) {
// was different, was added
addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
}
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class TestBoilerplate method getSet.
public static <T> UnicodeSet getSet(Map<Integer, T> m, T value) {
UnicodeSet result = new UnicodeSet();
for (Iterator<Integer> it = m.keySet().iterator(); it.hasNext(); ) {
Integer key = it.next();
T val = m.get(key);
if (!val.equals(value))
continue;
result.add(key.intValue());
}
return result;
}
Aggregations