use of android.icu.text.UnicodeSet in project j2objc by google.
the class UnicodeSetTest method CheckRangeSpeed.
/**
* @param iterations
* @param testSet
*/
private void CheckRangeSpeed(int iterations, UnicodeSet testSet) {
testSet.complement().complement();
String testPattern = testSet.toString();
// fill a set of pairs from the pattern
int[] pairs = new int[testSet.getRangeCount() * 2];
int j = 0;
for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.nextRange(); ) {
pairs[j++] = it.codepoint;
pairs[j++] = it.codepointEnd;
}
UnicodeSet fromRange = new UnicodeSet(testSet);
assertEquals("from range vs pattern", testSet, fromRange);
double start = System.currentTimeMillis();
for (int i = 0; i < iterations; ++i) {
fromRange = new UnicodeSet(testSet);
}
double middle = System.currentTimeMillis();
for (int i = 0; i < iterations; ++i) {
new UnicodeSet(testPattern);
}
double end = System.currentTimeMillis();
double rangeConstructorTime = (middle - start) / iterations;
double patternConstructorTime = (end - middle) / iterations;
String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t" + percent.format(rangeConstructorTime / patternConstructorTime - 1);
if (rangeConstructorTime < 2 * patternConstructorTime) {
logln(message);
} else {
errln(message);
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class BasicTest method initSkippables.
private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
skipSets[D].applyPattern("[[:NFD_QC=Yes:]&[:ccc=0:]]", false);
skipSets[C].applyPattern("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
skipSets[KD].applyPattern("[[:NFKD_QC=Yes:]&[:ccc=0:]]", false);
skipSets[KC].applyPattern("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
// Remove from the NFC and NFKC sets all those characters that change
// when a back-combining character is added.
// First, get all of the back-combining characters and their combining classes.
UnicodeSet combineBack = new UnicodeSet("[:NFC_QC=Maybe:]");
int numCombineBack = combineBack.size();
int[] combineBackCharsAndCc = new int[numCombineBack * 2];
UnicodeSetIterator iter = new UnicodeSetIterator(combineBack);
for (int i = 0; i < numCombineBack; ++i) {
iter.next();
int c = iter.codepoint;
combineBackCharsAndCc[2 * i] = c;
combineBackCharsAndCc[2 * i + 1] = UCharacter.getCombiningClass(c);
}
// We need not look at control codes, Han characters nor Hangul LVT syllables because they
// do not combine forward. LV syllables are already removed.
UnicodeSet notInteresting = new UnicodeSet("[[:C:][:Unified_Ideograph:][:HST=LVT:]]");
UnicodeSet unsure = ((UnicodeSet) (skipSets[C].clone())).removeAll(notInteresting);
// System.out.format("unsure.size()=%d\n", unsure.size());
// For each character about which we are unsure, see if it changes when we add
// one of the back-combining characters.
Normalizer2 norm2 = Normalizer2.getNFCInstance();
StringBuilder s = new StringBuilder();
iter.reset(unsure);
while (iter.next()) {
int c = iter.codepoint;
s.delete(0, 0x7fffffff).appendCodePoint(c);
int cLength = s.length();
int tccc = UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
for (int i = 0; i < numCombineBack; ++i) {
// If c's decomposition ends with a character with non-zero combining class, then
// c can only change if it combines with a character with a non-zero combining class.
int cc2 = combineBackCharsAndCc[2 * i + 1];
if (tccc == 0 || cc2 != 0) {
int c2 = combineBackCharsAndCc[2 * i];
s.appendCodePoint(c2);
if (!norm2.isNormalized(s)) {
// System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
skipSets[C].remove(c);
skipSets[KC].remove(c);
break;
}
s.delete(cLength, 0x7fffffff);
}
}
}
return skipSets;
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class BasicTest method TestSerializedSet.
@Test
public void TestSerializedSet() {
USerializedSet sset = new USerializedSet();
UnicodeSet set = new UnicodeSet();
int start, end;
char[] serialized = { // length
0x8007, // bmpLength
3, 0xc0, 0xfe, 0xfffc, 1, 9, 0x10, 0xfffc };
sset.getSet(serialized, 0);
// collect all sets into one for contiguous output
int[] startEnd = new int[2];
int count = sset.countRanges();
for (int j = 0; j < count; ++j) {
sset.getRange(j, startEnd);
set.add(startEnd[0], startEnd[1]);
}
// test all of these characters
UnicodeSetIterator it = new UnicodeSetIterator(set);
while (it.nextRange() && it.codepoint != UnicodeSetIterator.IS_STRING) {
start = it.codepoint;
end = it.codepointEnd;
while (start <= end) {
if (!sset.contains(start)) {
errln("USerializedSet.contains failed for " + Utility.hex(start, 8));
}
++start;
}
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class RBBITestMonkey method RunMonkey.
/**
* Run a RBBI monkey test. Common routine, for all break iterator types.
* Parameters:
* bi - the break iterator to use
* mk - MonkeyKind, abstraction for obtaining expected results
* name - Name of test (char, word, etc.) for use in error messages
* seed - Seed for starting random number generator (parameter from user)
* numIterations
*/
void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int numIterations) {
int TESTSTRINGLEN = 500;
StringBuffer testText = new StringBuffer();
int numCharClasses;
List chClasses;
int[] expected = new int[TESTSTRINGLEN * 2 + 1];
int expectedCount = 0;
boolean[] expectedBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
boolean[] forwardBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
boolean[] reverseBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
boolean[] followingBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
boolean[] precedingBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
int i;
int loopCount = 0;
boolean printTestData = false;
boolean printBreaksFromBI = false;
m_seed = seed;
numCharClasses = mk.charClasses().size();
chClasses = mk.charClasses();
// Verify that the character classes all have at least one member.
for (i = 0; i < numCharClasses; i++) {
UnicodeSet s = (UnicodeSet) chClasses.get(i);
if (s == null || s.size() == 0) {
errln("Character Class " + i + " is null or of zero size.");
return;
}
}
// --------------------------------------------------------------------------------------------
//
// Debugging settings. Comment out everything in the following block for normal operation
//
// --------------------------------------------------------------------------------------------
// numIterations = -1;
// numIterations = 10000; // Same as exhaustive.
// RuleBasedBreakIterator_New.fTrace = true;
// m_seed = 859056465;
// TESTSTRINGLEN = 50;
// printTestData = true;
// printBreaksFromBI = true;
// ((RuleBasedBreakIterator_New)bi).dump();
// --------------------------------------------------------------------------------------------
//
// End of Debugging settings.
//
// --------------------------------------------------------------------------------------------
int dotsOnLine = 0;
while (loopCount < numIterations || numIterations == -1) {
if (numIterations == -1 && loopCount % 10 == 0) {
// If test is running in an infinite loop, display a periodic tic so
// we can tell that it is making progress.
System.out.print(".");
if (dotsOnLine++ >= 80) {
System.out.println();
dotsOnLine = 0;
}
}
// Save current random number seed, so that we can recreate the random numbers
// for this loop iteration in event of an error.
seed = m_seed;
testText.setLength(0);
// Populate a test string with data.
if (printTestData) {
System.out.println("Test Data string ...");
}
for (i = 0; i < TESTSTRINGLEN; i++) {
int aClassNum = m_rand() % numCharClasses;
UnicodeSet classSet = (UnicodeSet) chClasses.get(aClassNum);
int charIdx = m_rand() % classSet.size();
int c = classSet.charAt(charIdx);
if (c < 0) {
// TODO: deal with sets containing strings.
errln("c < 0");
}
UTF16.appendCodePoint(testText, c);
if (printTestData) {
System.out.print(Integer.toHexString(c) + " ");
}
}
if (printTestData) {
System.out.println();
}
Arrays.fill(expected, 0);
Arrays.fill(expectedBreaks, false);
Arrays.fill(forwardBreaks, false);
Arrays.fill(reverseBreaks, false);
Arrays.fill(isBoundaryBreaks, false);
Arrays.fill(followingBreaks, false);
Arrays.fill(precedingBreaks, false);
// Calculate the expected results for this test string.
mk.setText(testText);
expectedCount = 0;
expectedBreaks[0] = true;
expected[expectedCount++] = 0;
int breakPos = 0;
int lastBreakPos = -1;
for (; ; ) {
lastBreakPos = breakPos;
breakPos = mk.next(breakPos);
if (breakPos == -1) {
break;
}
if (breakPos > testText.length()) {
errln("breakPos > testText.length()");
}
if (lastBreakPos >= breakPos) {
errln("Next() not increasing.");
// break;
}
expectedBreaks[breakPos] = true;
expected[expectedCount++] = breakPos;
}
// Find the break positions using forward iteration
if (printBreaksFromBI) {
System.out.println("Breaks from BI...");
}
bi.setText(testText.toString());
for (i = bi.first(); i != BreakIterator.DONE; i = bi.next()) {
if (i < 0 || i > testText.length()) {
errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
break;
}
if (printBreaksFromBI) {
System.out.print(Integer.toHexString(i) + " ");
}
forwardBreaks[i] = true;
}
if (printBreaksFromBI) {
System.out.println();
}
// Find the break positions using reverse iteration
for (i = bi.last(); i != BreakIterator.DONE; i = bi.previous()) {
if (i < 0 || i > testText.length()) {
errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
break;
}
reverseBreaks[i] = true;
}
// Find the break positions using isBoundary() tests.
for (i = 0; i <= testText.length(); i++) {
isBoundaryBreaks[i] = bi.isBoundary(i);
}
// Find the break positions using the following() function.
lastBreakPos = 0;
followingBreaks[0] = true;
for (i = 0; i < testText.length(); i++) {
breakPos = bi.following(i);
if (breakPos <= i || breakPos < lastBreakPos || breakPos > testText.length() || breakPos > lastBreakPos && lastBreakPos > i) {
errln(name + " break monkey test: " + "Out of range value returned by BreakIterator::following().\n" + "index=" + i + "following returned=" + breakPos + "lastBreak=" + lastBreakPos);
// Forces an error.
precedingBreaks[i] = !expectedBreaks[i];
} else {
followingBreaks[breakPos] = true;
lastBreakPos = breakPos;
}
}
// Find the break positions using the preceding() function.
lastBreakPos = testText.length();
precedingBreaks[testText.length()] = true;
for (i = testText.length(); i > 0; i--) {
breakPos = bi.preceding(i);
if (breakPos >= i || breakPos > lastBreakPos || breakPos < 0 || breakPos < lastBreakPos && lastBreakPos < i) {
errln(name + " break monkey test: " + "Out of range value returned by BreakIterator::preceding().\n" + "index=" + i + "preceding returned=" + breakPos + "lastBreak=" + lastBreakPos);
// Forces an error.
precedingBreaks[i] = !expectedBreaks[i];
} else {
precedingBreaks[breakPos] = true;
lastBreakPos = breakPos;
}
}
// Compare the expected and actual results.
for (i = 0; i <= testText.length(); i++) {
String errorType = null;
if (forwardBreaks[i] != expectedBreaks[i]) {
errorType = "next()";
} else if (reverseBreaks[i] != forwardBreaks[i]) {
errorType = "previous()";
} else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
errorType = "isBoundary()";
} else if (followingBreaks[i] != expectedBreaks[i]) {
errorType = "following()";
} else if (precedingBreaks[i] != expectedBreaks[i]) {
errorType = "preceding()";
}
if (errorType != null) {
// Format a range of the test text that includes the failure as
// a data item that can be included in the rbbi test data file.
// Start of the range is the last point where expected and actual results
// both agreed that there was a break position.
int startContext = i;
int count = 0;
for (; ; ) {
if (startContext == 0) {
break;
}
startContext--;
if (expectedBreaks[startContext]) {
if (count == 2)
break;
count++;
}
}
// End of range is two expected breaks past the start position.
int endContext = i + 1;
int ci;
for (ci = 0; ci < 2; ci++) {
// Number of items to include in error text.
for (; ; ) {
if (endContext >= testText.length()) {
break;
}
if (expectedBreaks[endContext - 1]) {
if (count == 0)
break;
count--;
}
endContext++;
}
}
// Format looks like "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
StringBuffer errorText = new StringBuffer();
// Char from test data
int c;
for (ci = startContext; ci <= endContext && ci != -1; ci = nextCP(testText, ci)) {
if (ci == i) {
// This is the location of the error.
errorText.append("<?>---------------------------------\n");
} else if (expectedBreaks[ci]) {
// This a non-error expected break position.
errorText.append("------------------------------------\n");
}
if (ci < testText.length()) {
c = UTF16.charAt(testText, ci);
appendCharToBuf(errorText, c, 11);
String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
appendToBuf(errorText, gc, 8);
int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
String extraPropValue = UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
appendToBuf(errorText, extraPropValue, 20);
String charName = UCharacter.getExtendedName(c);
appendToBuf(errorText, charName, 40);
errorText.append('\n');
}
}
if (ci == testText.length() && ci != -1) {
errorText.append("<>");
}
errorText.append("</data>\n");
// Output the error
errln(name + " break monkey test error. " + (expectedBreaks[i] ? "Break expected but not found." : "Break found but not expected.") + "\nOperation = " + errorType + "; random seed = " + seed + "; buf Idx = " + i + "\n" + errorText);
break;
}
}
loopCount++;
}
}
use of android.icu.text.UnicodeSet in project j2objc by google.
the class BasicTest method TestSkippable.
@Test
public void TestSkippable() {
UnicodeSet[] skipSets = new UnicodeSet[] { // NFD
new UnicodeSet(), // NFC
new UnicodeSet(), // NFKD
new UnicodeSet(), // NFKC
new UnicodeSet() };
UnicodeSet[] expectSets = new UnicodeSet[] { new UnicodeSet(), new UnicodeSet(), new UnicodeSet(), new UnicodeSet() };
StringBuilder s, pattern;
// build NF*Skippable sets from runtime data
skipSets[D].applyPattern("[:NFD_Inert:]");
skipSets[C].applyPattern("[:NFC_Inert:]");
skipSets[KD].applyPattern("[:NFKD_Inert:]");
skipSets[KC].applyPattern("[:NFKC_Inert:]");
expectSets = initSkippables(expectSets);
if (expectSets[D].contains(0x0350)) {
errln("expectSets[D] contains 0x0350");
}
for (int i = 0; i < expectSets.length; ++i) {
if (!skipSets[i].equals(expectSets[i])) {
errln("error: TestSkippable skipSets[" + i + "]!=expectedSets[" + i + "]\n");
// Note: This used to depend on hardcoded UnicodeSet patterns generated by
// Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
// running com.ibm.text.UCD.Main with the option NFSkippable.
// Since ICU 4.6/Unicode 6, we are generating the
// expectSets ourselves in initSkippables().
s = new StringBuilder();
s.append("\n\nskip= ");
s.append(skipSets[i].toPattern(true));
s.append("\n\n");
s.append("skip-expect=");
pattern = new StringBuilder(((UnicodeSet) skipSets[i].clone()).removeAll(expectSets[i]).toPattern(true));
s.append(pattern);
pattern.delete(0, pattern.length());
s.append("\n\nexpect-skip=");
pattern = new StringBuilder(((UnicodeSet) expectSets[i].clone()).removeAll(skipSets[i]).toPattern(true));
s.append(pattern);
s.append("\n\n");
pattern.delete(0, pattern.length());
s.append("\n\nintersection(expect,skip)=");
UnicodeSet intersection = ((UnicodeSet) expectSets[i].clone()).retainAll(skipSets[i]);
pattern = new StringBuilder(intersection.toPattern(true));
s.append(pattern);
// Special: test coverage for append(char).
s.append('\n');
s.append('\n');
errln(s.toString());
}
}
}
Aggregations