Examples with UnicodeSetIterator - android.icu.text.UnicodeSetIterator

Example 1 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationBuilder method closeOverComposites.

private void closeOverComposites() {
    // empty
    String prefix = "";
    UnicodeSetIterator iter = new UnicodeSetIterator(COMPOSITES);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        String nfdString = nfd.getDecomposition(iter.codepoint);
        cesLength = dataBuilder.getCEs(nfdString, ces, 0);
        if (cesLength > Collation.MAX_EXPANSION_LENGTH) {
            // However, this can only really happen in contrived cases.
            continue;
        }
        String composite = iter.getString();
        addIfDifferent(prefix, composite, ces, cesLength, Collation.UNASSIGNED_CE32);
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator)

Example 2 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationBuilder method addTailComposites.

private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
    // Look for the last starter in the NFD string.
    int lastStarter;
    int indexAfterLastStarter = nfdString.length();
    for (; ; ) {
        // no starter at all
        if (indexAfterLastStarter == 0) {
            return;
        }
        lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
        if (nfd.getCombiningClass(lastStarter) == 0) {
            break;
        }
        indexAfterLastStarter -= Character.charCount(lastStarter);
    }
    // No closure to Hangul syllables since we decompose them on the fly.
    if (Hangul.isJamoL(lastStarter)) {
        return;
    }
    // Are there any composites whose decomposition starts with the lastStarter?
    // Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
    // We might find some more equivalent mappings here if it did.
    UnicodeSet composites = new UnicodeSet();
    if (!nfcImpl.getCanonStartSet(lastStarter, composites)) {
        return;
    }
    StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
    long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
    UnicodeSetIterator iter = new UnicodeSetIterator(composites);
    while (iter.next()) {
        assert (iter.codepoint != UnicodeSetIterator.IS_STRING);
        int composite = iter.codepoint;
        String decomp = nfd.getDecomposition(composite);
        if (!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp, newNFDString, newString)) {
            continue;
        }
        int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
        if (newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
            // Ignore mappings that we cannot store.
            continue;
        }
        // Note: It is possible that the newCEs do not make use of the mapping
        // for which we are adding the tail composites, in which case we might be adding
        // unnecessary mappings.
        // For example, when we add tail composites for ae^ (^=combining circumflex),
        // UCA discontiguous-contraction matching does not find any matches
        // for ae_^ (_=any combining diacritic below) *unless* there is also
        // a contraction mapping for ae.
        // Thus, if there is no ae contraction, then the ae^ mapping is ignored
        // while fetching the newCEs for ae_^.
        // TODO: Try to detect this effectively.
        // (Alternatively, print a warning when prefix contractions are missing.)
        // We do not need an explicit mapping for the NFD strings.
        // It is fine if the NFD input collates like this via a sequence of mappings.
        // It also saves a little bit of space, and may reduce the set of characters with contractions.
        int ce32 = addIfDifferent(nfdPrefix, newString, newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
        if (ce32 != Collation.UNASSIGNED_CE32) {
            // was different, was added
            addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
        }
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 3 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class CollationMiscTest method TestImportWithType.

@Test
public void TestImportWithType() {
    try {
        RuleBasedCollator vicoll = (RuleBasedCollator) Collator.getInstance(new ULocale("vi"));
        RuleBasedCollator decoll = (RuleBasedCollator) Collator.getInstance(ULocale.forLanguageTag("de-u-co-phonebk"));
        RuleBasedCollator videcoll = new RuleBasedCollator(vicoll.getRules() + decoll.getRules());
        RuleBasedCollator importvidecoll = new RuleBasedCollator("[import vi][import de-u-co-phonebk]");
        UnicodeSet tailoredSet = videcoll.getTailoredSet();
        UnicodeSet importTailoredSet = importvidecoll.getTailoredSet();
        if (!tailoredSet.equals(importTailoredSet)) {
            warnln("Tailored set not equal");
        }
        for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next(); ) {
            String t = it.getString();
            CollationKey sk1 = videcoll.getCollationKey(t);
            CollationKey sk2 = importvidecoll.getCollationKey(t);
            if (!sk1.equals(sk2)) {
                warnln("Collation key's not equal for " + t);
            }
        }
    } catch (Exception e) {
        // Android patch: Add --omitCollationRules to genrb.
        logln("ERROR: in creation of rule based collator");
    // Android patch end.
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) RuleBasedCollator(android.icu.text.RuleBasedCollator) ULocale(android.icu.util.ULocale) CollationKey(android.icu.text.CollationKey) RawCollationKey(android.icu.text.RawCollationKey) UnicodeSet(android.icu.text.UnicodeSet) Test(org.junit.Test)

Example 4 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class RoundTripTest method getRepresentativeBoundaryHangul.

private static UnicodeSet getRepresentativeBoundaryHangul() {
    UnicodeSet resultToAddTo = new UnicodeSet();
    // U+1100 HANGUL CHOSEONG KIYEOK
    // U+1161 HANGUL JUNGSEONG A
    UnicodeSet L = new UnicodeSet("[:hst=L:]");
    UnicodeSet V = new UnicodeSet("[:hst=V:]");
    UnicodeSet T = new UnicodeSet("[:hst=T:]");
    String prefixLV = "\u1100\u1161";
    String prefixL = "\u1100";
    String suffixV = "\u1161";
    // HANGUL CHOSEONG IEUNG
    String nullL = "\u110B";
    UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
    for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next(); ) {
        for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
            for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next(); ) {
                String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
                String trial = Normalizer.compose(sample, false);
                if (trial.length() == 2) {
                    resultToAddTo.add(trial);
                }
            }
        }
    }
    for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next(); ) {
        // do all combinations of "g" + V + L + "a"
        final String suffix = iL.getString() + suffixV;
        for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next(); ) {
            String sample = prefixL + iV.getString() + suffix;
            String trial = Normalizer.compose(sample, false);
            if (trial.length() == 2) {
                resultToAddTo.add(trial);
            }
        }
        // do all combinations of "ga" + T + L + "a"
        for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next(); ) {
            String sample = prefixLV + iT.getString() + suffix;
            String trial = Normalizer.compose(sample, false);
            if (trial.length() == 2) {
                resultToAddTo.add(trial);
            }
        }
    }
    return resultToAddTo;
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet)

Example 5 with UnicodeSetIterator

use of android.icu.text.UnicodeSetIterator in project j2objc by google.

the class TransliteratorTest method TestGurmukhiDevanagari.

/**
 * Test Gurmukhi-Devanagari Tippi and Bindi
 */
@Test
public void TestGurmukhiDevanagari() {
    // the rule says:
    // (\u0902) (when preceded by vowel)      --->  (\u0A02)
    // (\u0902) (when preceded by consonant)  --->  (\u0A70)
    UnicodeSet vowel = new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
    UnicodeSet non_vowel = new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
    UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
    UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
    Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
    StringBuffer src = new StringBuffer(" \u0902");
    StringBuffer expect = new StringBuffer(" \u0A02");
    while (vIter.next()) {
        src.setCharAt(0, (char) vIter.codepoint);
        expect.setCharAt(0, (char) (vIter.codepoint + 0x0100));
        expect(trans, src.toString(), expect.toString());
    }
    expect.setCharAt(1, '\u0A70');
    while (nvIter.next()) {
        // src.setCharAt(0,(char) nvIter.codepoint);
        src.setCharAt(0, (char) nvIter.codepoint);
        expect.setCharAt(0, (char) (nvIter.codepoint + 0x0100));
        expect(trans, src.toString(), expect.toString());
    }
}

Also used : UnicodeSetIterator(android.icu.text.UnicodeSetIterator) UnicodeSet(android.icu.text.UnicodeSet) Transliterator(android.icu.text.Transliterator) Test(org.junit.Test)

Aggregations

UnicodeSetIterator (android.icu.text.UnicodeSetIterator)31 UnicodeSet (android.icu.text.UnicodeSet)25 Test (org.junit.Test)17 ULocale (android.icu.util.ULocale)6 Transliterator (android.icu.text.Transliterator)5 HashSet (java.util.HashSet)4 RuleBasedCollator (android.icu.text.RuleBasedCollator)3 CollationData (android.icu.impl.coll.CollationData)2 CollationKey (android.icu.text.CollationKey)2 FilteredNormalizer2 (android.icu.text.FilteredNormalizer2)2 Normalizer2 (android.icu.text.Normalizer2)2 RawCollationKey (android.icu.text.RawCollationKey)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 Iterator (java.util.Iterator)2 TreeSet (java.util.TreeSet)2 Normalizer2Impl (android.icu.impl.Normalizer2Impl)1