Examples with UnicodeSet - android.icu.text.UnicodeSet

Example 21 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method assertEquals.

void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
    boolean haveError = false;
    if (!actual.containsAll(empirical)) {
        UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
        errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
        haveError = true;
    }
    if (!empirical.containsAll(actual)) {
        UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
        logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
        haveError = true;
    }
    if (!haveError) {
        logln("OK " + message + ' ' + toPattern(empirical));
    }
}

Also used : UnicodeSet(android.icu.text.UnicodeSet)

Example 22 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method TestCoverage.

// ======================================================================
// These tests are not mirrored (yet) in icu4c at
// source/test/intltest/transtst.cpp
// ======================================================================
/**
 * Improve code coverage.
 */
@Test
public void TestCoverage() {
    // NullTransliterator
    Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
    expect(t, "a", "a");
    // Source, target set
    t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
    t.setFilter(new UnicodeSet("[A-Z]"));
    logln("source = " + t.getSourceSet());
    logln("target = " + t.getTargetSet());
    t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
    logln("source = " + t.getSourceSet());
    logln("target = " + t.getTargetSet());
}

Also used : UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 23 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class TransliteratorTest method TestSourceTargetSet2.

@Test
public void TestSourceTargetSet2() {
    Normalizer2 nfc = Normalizer2.getNFCInstance();
    Normalizer2 nfd = Normalizer2.getNFDInstance();
    // Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
    // UnicodeSet nfkdSource = new UnicodeSet();
    // UnicodeSet nfkdTarget = new UnicodeSet();
    // for (int i = 0; i <= 0x10FFFF; ++i) {
    // if (nfkd.isInert(i)) {
    // continue;
    // }
    // nfkdSource.add(i);
    // String t = nfkd.getDecomposition(i);
    // if (t != null) {
    // nfkdTarget.addAll(t);
    // } else {
    // nfkdTarget.add(i);
    // }
    // }
    // nfkdSource.freeze();
    // nfkdTarget.freeze();
    // logln("NFKD Source: " + nfkdSource.toPattern(false));
    // logln("NFKD Target: " + nfkdTarget.toPattern(false));
    UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
    UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
    UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
    CanonicalIterator can = new CanonicalIterator("");
    UnicodeSet disorderedMarks = new UnicodeSet();
    for (int i = 0; i <= 0x10FFFF; ++i) {
        String s = nfd.getDecomposition(i);
        if (s == null) {
            continue;
        }
        can.setSource(s);
        for (String t = can.next(); t != null; t = can.next()) {
            disorderedMarks.add(t);
        }
        // if s has two code points, (or more), add the lead/trail information
        int first = s.codePointAt(0);
        int firstCount = Character.charCount(first);
        if (s.length() == firstCount)
            continue;
        String trailString = s.substring(firstCount);
        // add all the trail characters
        if (!nonStarters.containsSome(trailString)) {
            continue;
        }
        UnicodeSet trailSet = leadToTrail.get(first);
        if (trailSet == null) {
            leadToTrail.put(first, trailSet = new UnicodeSet());
        }
        // add remaining trails
        trailSet.addAll(trailString);
        // add the sources
        UnicodeSet sourcesSet = leadToSources.get(first);
        if (sourcesSet == null) {
            leadToSources.put(first, sourcesSet = new UnicodeSet());
        }
        sourcesSet.add(i);
    }
    for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
        String lead = x.getKey();
        UnicodeSet sources = x.getValue();
        UnicodeSet trailSet = leadToTrail.get(lead);
        for (String source : sources) {
            for (String trail : trailSet) {
                can.setSource(source + trail);
                for (String t = can.next(); t != null; t = can.next()) {
                    if (t.endsWith(trail))
                        continue;
                    disorderedMarks.add(t);
                }
            }
        }
    }
    for (String s : nonStarters) {
        disorderedMarks.add("\u0345" + s);
        disorderedMarks.add(s + "\u0323");
        String xx = nfc.normalize("\u01EC" + s);
        if (!xx.startsWith("\u01EC")) {
            logln("??");
        }
    }
    // for (int i = 0; i <= 0x10FFFF; ++i) {
    // String s = nfkd.getDecomposition(i);
    // if (s != null) {
    // disorderedMarks.add(s);
    // disorderedMarks.add(nfc.normalize(s));
    // addDerivedStrings(nfc, disorderedMarks, s);
    // }
    // s = nfd.getDecomposition(i);
    // if (s != null) {
    // disorderedMarks.add(s);
    // }
    // if (!nfc.isInert(i)) {
    // if (i == 0x00C0) {
    // logln("\u00C0");
    // }
    // can.setSource(s+"\u0334");
    // for (String t = can.next(); t != null; t = can.next()) {
    // addDerivedStrings(nfc, disorderedMarks, t);
    // }
    // can.setSource(s+"\u0345");
    // for (String t = can.next(); t != null; t = can.next()) {
    // addDerivedStrings(nfc, disorderedMarks, t);
    // }
    // can.setSource(s+"\u0323");
    // for (String t = can.next(); t != null; t = can.next()) {
    // addDerivedStrings(nfc, disorderedMarks, t);
    // }
    // }
    // }
    logln("Test cases: " + disorderedMarks.size());
    disorderedMarks.addAll(0, 0x10FFFF).freeze();
    logln("isInert \u0104 " + nfc.isInert('\u0104'));
    Object[][] rules = { { ":: [:sc=COMMON:] any-name;", null }, { ":: [:Greek:] hex-any/C;", null }, { ":: [:Greek:] any-hex/C;", null }, { ":: [[:Mn:][:Me:]] remove;", null }, { ":: [[:Mn:][:Me:]] null;", null }, { ":: lower;", null }, { ":: upper;", null }, { ":: title;", null }, { ":: CaseFold;", null }, { ":: NFD;", null }, { ":: NFC;", null }, { ":: NFKD;", null }, { ":: NFKC;", null }, { ":: [[:Mn:][:Me:]] NFKD;", null }, { ":: Latin-Greek;", null }, { ":: [:Latin:] NFKD;", null }, { ":: NFKD;", null }, { ":: NFKD;\n" + ":: [[:Mn:][:Me:]] remove;\n" + ":: NFC;", null } };
    for (Object[] rulex : rules) {
        String rule = (String) rulex[0];
        Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
        UnicodeSet actualSource = trans.getSourceSet();
        UnicodeSet actualTarget = trans.getTargetSet();
        UnicodeSet empiricalSource = new UnicodeSet();
        UnicodeSet empiricalTarget = new UnicodeSet();
        String ruleDisplay = rule.replace("\n", "\t\t");
        UnicodeSet toTest = disorderedMarks;
        // if (rulex[1] != null) {
        // toTest = new UnicodeSet(disorderedMarks);
        // toTest.addAll((UnicodeSet) rulex[1]);
        // }
        String test = nfd.normalize("\u0104");
        boolean DEBUG = true;
        @SuppressWarnings("unused") int // for debugging
        count = 0;
        for (String s : toTest) {
            if (s.equals(test)) {
                logln(test);
            }
            String t = trans.transform(s);
            if (!s.equals(t)) {
                if (!isAtomic(s, t, trans)) {
                    isAtomic(s, t, trans);
                    continue;
                }
                // }
                if (DEBUG) {
                    if (!actualSource.containsAll(s)) {
                        count++;
                    }
                    if (!actualTarget.containsAll(t)) {
                        count++;
                    }
                }
                addSourceTarget(s, empiricalSource, t, empiricalTarget);
            }
        }
        assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
        assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
    }
}

Also used : Normalizer2(android.icu.text.Normalizer2) CaseInsensitiveString(android.icu.util.CaseInsensitiveString) ReplaceableString(android.icu.text.ReplaceableString) UnicodeSet(android.icu.text.UnicodeSet) CanonicalIterator(android.icu.text.CanonicalIterator) UnicodeMap(android.icu.dev.util.UnicodeMap) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Example 24 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeMap method composeWith.

public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) {
    for (T value : other.getAvailableValues()) {
        UnicodeSet set = other.keySet(value);
        composeWith(set, value, composer);
    }
    return this;
}

Also used : UnicodeSet(android.icu.text.UnicodeSet)

Example 25 with UnicodeSet

use of android.icu.text.UnicodeSet in project j2objc by google.

the class UnicodeMap method toString.

public String toString(Comparator<T> collected) {
    StringBuffer result = new StringBuffer();
    if (collected == null) {
        for (int i = 0; i < length - 1; ++i) {
            T value = values[i];
            if (value == null)
                continue;
            int start = transitions[i];
            int end = transitions[i + 1] - 1;
            result.append(Utility.hex(start));
            if (start != end)
                result.append("-").append(Utility.hex(end));
            result.append("=").append(value.toString()).append("\n");
        }
        if (stringMap != null) {
            for (String s : stringMap.keySet()) {
                result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n");
            }
        }
    } else {
        Set<T> set = values(new TreeSet<T>(collected));
        for (Iterator<T> it = set.iterator(); it.hasNext(); ) {
            T value = it.next();
            UnicodeSet s = keySet(value);
            result.append(value).append("=").append(s.toString()).append("\n");
        }
    }
    return result.toString();
}

Also used : UnicodeSet(android.icu.text.UnicodeSet)

Aggregations

UnicodeSet (android.icu.text.UnicodeSet)158 Test (org.junit.Test)112 UnicodeSetIterator (android.icu.text.UnicodeSetIterator)25 Transliterator (android.icu.text.Transliterator)19 ReplaceableString (android.icu.text.ReplaceableString)14 ULocale (android.icu.util.ULocale)13 CaseInsensitiveString (android.icu.util.CaseInsensitiveString)9 Normalizer2 (android.icu.text.Normalizer2)7 RuleBasedCollator (android.icu.text.RuleBasedCollator)7 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)4 SpoofChecker (android.icu.text.SpoofChecker)4 TreeSet (java.util.TreeSet)4 UnicodeMap (android.icu.dev.util.UnicodeMap)3 AlphabeticIndex (android.icu.text.AlphabeticIndex)3 CollationKey (android.icu.text.CollationKey)3 RawCollationKey (android.icu.text.RawCollationKey)3 CheckResult (android.icu.text.SpoofChecker.CheckResult)3 SpanCondition (android.icu.text.UnicodeSet.SpanCondition)3