Search in sources :

Example 1 with UnicodeSetIterator

use of com.ibm.icu.text.UnicodeSetIterator in project lucene-solr by apache.

the class GenerateUTR30DataFiles method expandSingleRule.

private static void expandSingleRule(StringBuilder builder, String leftHandSide, String rightHandSide) throws IllegalArgumentException {
    UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
    boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange(); ) {
        if (it.codepoint != UnicodeSetIterator.IS_STRING) {
            if (numericValue) {
                for (int cp = it.codepoint; cp <= it.codepointEnd; ++cp) {
                    builder.append(String.format(Locale.ROOT, "%04X", cp)).append('>');
                    builder.append(String.format(Locale.ROOT, "%04X", 0x30 + UCharacter.getNumericValue(cp)));
                    builder.append("   # ").append(UCharacter.getName(cp));
                    builder.append("\n");
                }
            } else {
                builder.append(String.format(Locale.ROOT, "%04X", it.codepoint));
                if (it.codepointEnd > it.codepoint) {
                    builder.append("..").append(String.format(Locale.ROOT, "%04X", it.codepointEnd));
                }
                builder.append('>').append(rightHandSide).append("\n");
            }
        } else {
            System.err.println("ERROR: String '" + it.getString() + "' found in UnicodeSet");
            System.exit(1);
        }
    }
}
Also used : UnicodeSetIterator(com.ibm.icu.text.UnicodeSetIterator) UnicodeSet(com.ibm.icu.text.UnicodeSet)

Aggregations

UnicodeSet (com.ibm.icu.text.UnicodeSet)1 UnicodeSetIterator (com.ibm.icu.text.UnicodeSetIterator)1