Search in sources :

Example 11 with ULocale

use of com.ibm.icu.util.ULocale in project closure-templates by google.

the class SoyMsgBundleWithFullLocaleTest method testSoyMsgBundleWithFullLocale_deprecatedLanguageCode.

@Test
public void testSoyMsgBundleWithFullLocale_deprecatedLanguageCode() throws Exception {
    // Given a message bundle with a deprecated language code ("iw").
    SoyMsgBundle iwBundle = new SoyMsgBundleWithFullLocale(SoyMsgBundle.EMPTY, new ULocale("iw"), "iw");
    // Expect the original soy bundle and locale to be preserved.
    SoyMsgBundle bundle = SoyMsgBundleWithFullLocale.preservingLocaleIfAllowed(iwBundle, new Locale("iw"));
    assertThat(bundle.getLocaleString()).isEqualTo("iw");
    assertThat(bundle).isEqualTo(iwBundle);
}
Also used : ULocale(com.ibm.icu.util.ULocale) Locale(java.util.Locale) ULocale(com.ibm.icu.util.ULocale) Test(org.junit.Test)

Example 12 with ULocale

use of com.ibm.icu.util.ULocale in project validator by validator.

the class LanguageDetectingChecker method checkContentLanguageHeader.

private void checkContentLanguageHeader(String detectedLanguage, String detectedLanguageName, String detectedLanguageCode, String preferredLanguageCode) throws SAXException {
    if ("".equals(httpContentLangHeader) || httpContentLangHeader.contains(",")) {
        return;
    }
    String message = "";
    String lowerCaseContentLang = httpContentLangHeader.toLowerCase();
    String contentLangCode = new ULocale(lowerCaseContentLang).getLanguage();
    if ("tl".equals(detectedLanguageCode) && ("ceb".equals(contentLangCode) || "ilo".equals(contentLangCode) || "pag".equals(contentLangCode) || "war".equals(contentLangCode))) {
        return;
    }
    if ("id".equals(detectedLanguageCode) && "min".equals(contentLangCode)) {
        return;
    }
    if ("ms".equals(detectedLanguageCode) && "min".equals(contentLangCode)) {
        return;
    }
    if ("hr".equals(detectedLanguageCode) && ("sr".equals(contentLangCode) || "bs".equals(contentLangCode) || "sh".equals(contentLangCode))) {
        return;
    }
    if ("sr".equals(detectedLanguageCode) && ("hr".equals(contentLangCode) || "bs".equals(contentLangCode) || "sh".equals(contentLangCode))) {
        return;
    }
    if ("bs".equals(detectedLanguageCode) && ("hr".equals(contentLangCode) || "sr".equals(contentLangCode) || "sh".equals(contentLangCode))) {
        return;
    }
    if ("de".equals(detectedLanguageCode) && ("bar".equals(contentLangCode) || "gsw".equals(contentLangCode) || "lb".equals(contentLangCode))) {
        return;
    }
    if ("zh".equals(detectedLanguageCode) && "yue".equals(lowerCaseContentLang)) {
        return;
    }
    if ("es".equals(detectedLanguageCode) && ("an".equals(contentLangCode) || "ast".equals(contentLangCode))) {
        return;
    }
    if ("it".equals(detectedLanguageCode) && ("co".equals(contentLangCode) || "pms".equals(contentLangCode) || "vec".equals(contentLangCode) || "lmo".equals(contentLangCode) || "scn".equals(contentLangCode) || "nap".equals(contentLangCode))) {
        return;
    }
    if ("rw".equals(detectedLanguageCode) && "rn".equals(contentLangCode)) {
        return;
    }
    if ("mhr".equals(detectedLanguageCode) && ("chm".equals(contentLangCode) || "mrj".equals(contentLangCode))) {
        return;
    }
    if ("mrj".equals(detectedLanguageCode) && ("chm".equals(contentLangCode) || "mhr".equals(contentLangCode))) {
        return;
    }
    if ("ru".equals(detectedLanguageCode) && "bg".equals(contentLangCode)) {
        return;
    }
    if (zhSubtagMismatch(detectedLanguage, lowerCaseContentLang) || !contentLangCode.equals(detectedLanguageCode)) {
        message = "This document appears to be written in %s but the value" + " of the HTTP \u201CContent-Language\u201D header is" + " \u201C%s\u201D. Consider changing it to" + " \u201C%s\u201D (or variant).";
        warn(String.format(message, detectedLanguageName, lowerCaseContentLang, preferredLanguageCode, preferredLanguageCode));
    }
    if (htmlElementHasLang) {
        message = "The value of the HTTP \u201CContent-Language\u201D" + " header is \u201C%s\u201D but it will be ignored because" + " the \u201Chtml\u201D start tag has %s.";
        String lowerCaseLang = htmlElementLangAttrValue.toLowerCase();
        if (htmlElementHasLang) {
            if (zhSubtagMismatch(lowerCaseContentLang, lowerCaseLang) || !contentLangCode.equals(declaredLangCode)) {
                warn(String.format(message, httpContentLangHeader, getAttValueExpr("lang", htmlElementLangAttrValue)), htmlStartTagLocator);
            }
        }
    }
}
Also used : ULocale(com.ibm.icu.util.ULocale)

Example 13 with ULocale

use of com.ibm.icu.util.ULocale in project validator by validator.

the class LanguageDetectingChecker method startElement.

/**
 * @see nu.validator.checker.Checker#startElement(java.lang.String,
 *      java.lang.String, java.lang.String, org.xml.sax.Attributes)
 */
@Override
public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
    if ("html".equals(localName)) {
        htmlStartTagLocator = new LocatorImpl(getDocumentLocator());
        for (int i = 0; i < atts.getLength(); i++) {
            if ("lang".equals(atts.getLocalName(i))) {
                if (request != null) {
                    request.setAttribute("http://validator.nu/properties/lang-found", true);
                }
                htmlElementHasLang = true;
                htmlElementLangAttrValue = atts.getValue(i);
                declaredLangCode = new ULocale(htmlElementLangAttrValue).getLanguage();
            } else if ("dir".equals(atts.getLocalName(i))) {
                hasDir = true;
                dirAttrValue = atts.getValue(i);
            }
        }
    } else if ("body".equals(localName)) {
        inBody = true;
    } else if (inBody) {
        if (currentOpenElementsInDifferentLang > 0) {
            currentOpenElementsInDifferentLang++;
        } else {
            for (int i = 0; i < atts.getLength(); i++) {
                if ("lang".equals(atts.getLocalName(i))) {
                    if (!"".equals(htmlElementLangAttrValue) && !htmlElementLangAttrValue.equals(atts.getValue(i))) {
                        currentOpenElementsInDifferentLang++;
                    }
                }
            }
        }
    }
    if (Arrays.binarySearch(SKIP_NAMES, localName) >= 0) {
        currentOpenElementsWithSkipName++;
    }
}
Also used : ULocale(com.ibm.icu.util.ULocale) LocatorImpl(nu.validator.checker.LocatorImpl)

Example 14 with ULocale

use of com.ibm.icu.util.ULocale in project validator by validator.

the class LanguageDetectingChecker method detectLanguageAndCheckAgainstDeclaredLanguage.

private void detectLanguageAndCheckAgainstDeclaredLanguage() throws SAXException {
    if (nonWhitespaceCharacterCount < MIN_CHARS) {
        warnIfMissingLang();
        return;
    }
    if (// "No Linguistic Content"
    "zxx".equals(declaredLangCode) || // Esperanto
    "eo".equals(declaredLangCode) || // Latin
    "la".equals(declaredLangCode)) {
        return;
    }
    if (LANG_TAGS_BY_TLD.containsKey(tld) && Arrays.binarySearch(LANG_TAGS_BY_TLD.get(tld), declaredLangCode) >= 0) {
        return;
    }
    try {
        String textContent = // 
        documentContent.toString().replaceAll("\\s+", " ");
        String detectedLanguage = "";
        Detector detector = DetectorFactory.create();
        detector.append(textContent);
        detector.getProbabilities();
        ArrayList<String> possibileLanguages = new ArrayList<>();
        ArrayList<Language> possibilities = detector.getProbabilities();
        for (Language possibility : possibilities) {
            possibileLanguages.add(possibility.lang);
            ULocale plocale = new ULocale(possibility.lang);
            if (Arrays.binarySearch(COMMON_LANGS, possibility.lang) < 0 && systemId != null) {
                log4j.info(String.format("%s %s %s", plocale.getDisplayName(), possibility.prob, systemId));
            }
            if (possibility.prob > MIN_PROBABILITY) {
                detectedLanguage = possibility.lang;
                setDocumentLanguage(detectedLanguage);
            } else if ((possibileLanguages.contains("hr") && (possibileLanguages.contains("sr-latn") || possibileLanguages.contains("bs"))) || (possibileLanguages.contains("sr-latn") && (possibileLanguages.contains("hr") || possibileLanguages.contains("bs"))) || (possibileLanguages.contains("bs") && (possibileLanguages.contains("hr") || possibileLanguages.contains("sr-latn")))) {
                if (htmlElementHasLang || systemId != null) {
                    detectedLanguage = getDetectedLanguageSerboCroatian();
                    setDocumentLanguage(detectedLanguage);
                }
                if ("sh".equals(detectedLanguage)) {
                    checkLangAttributeSerboCroatian();
                    return;
                }
            }
        }
        if ("".equals(detectedLanguage)) {
            warnIfMissingLang();
            return;
        }
        String detectedLanguageName = "";
        String preferredLanguageCode = "";
        ULocale locale = new ULocale(detectedLanguage);
        String detectedLanguageCode = locale.getLanguage();
        if ("no".equals(detectedLanguage)) {
            checkLangAttributeNorwegian();
            checkContentLanguageHeaderNorwegian(detectedLanguage, detectedLanguageName, detectedLanguageCode);
            return;
        }
        if ("zh-hans".equals(detectedLanguage)) {
            detectedLanguageName = "Simplified Chinese";
            preferredLanguageCode = "zh-hans";
        } else if ("zh-hant".equals(detectedLanguage)) {
            detectedLanguageName = "Traditional Chinese";
            preferredLanguageCode = "zh-hant";
        } else if ("mhr".equals(detectedLanguage)) {
            detectedLanguageName = "Meadow Mari";
            preferredLanguageCode = "mhr";
        } else if ("mrj".equals(detectedLanguage)) {
            detectedLanguageName = "Hill Mari";
            preferredLanguageCode = "mrj";
        } else if ("nah".equals(detectedLanguage)) {
            detectedLanguageName = "Nahuatl";
            preferredLanguageCode = "nah";
        } else if ("pnb".equals(detectedLanguage)) {
            detectedLanguageName = "Western Panjabi";
            preferredLanguageCode = "pnb";
        } else if ("sr-cyrl".equals(detectedLanguage)) {
            detectedLanguageName = "Serbian";
            preferredLanguageCode = "sr";
        } else if ("sr-latn".equals(detectedLanguage)) {
            detectedLanguageName = "Serbian";
            preferredLanguageCode = "sr";
        } else if ("uz-cyrl".equals(detectedLanguage)) {
            detectedLanguageName = "Uzbek";
            preferredLanguageCode = "uz";
        } else if ("uz-latn".equals(detectedLanguage)) {
            detectedLanguageName = "Uzbek";
            preferredLanguageCode = "uz";
        } else if ("zxx".equals(detectedLanguage)) {
            detectedLanguageName = "Lorem ipsum text";
            preferredLanguageCode = "zxx";
        } else {
            detectedLanguageName = locale.getDisplayName();
            preferredLanguageCode = detectedLanguageCode;
        }
        checkLangAttribute(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
        checkDirAttribute(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
        checkContentLanguageHeader(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
    } catch (LangDetectException e) {
    }
}
Also used : Detector(com.cybozu.labs.langdetect.Detector) ULocale(com.ibm.icu.util.ULocale) Language(com.cybozu.labs.langdetect.Language) ArrayList(java.util.ArrayList) LangDetectException(com.cybozu.labs.langdetect.LangDetectException)

Example 15 with ULocale

use of com.ibm.icu.util.ULocale in project es6draft by anba.

the class DateTimeFormatConstructor method BasicFormatMatcher.

/**
 * 12.1.3 BasicFormatMatcher (options, formats)
 *
 * @param formatRecord
 *            the format matcher record
 * @param dataLocale
 *            the locale
 * @return the basic format matcher
 */
public static String BasicFormatMatcher(FormatMatcherRecord formatRecord, String dataLocale) {
    ULocale locale = ULocale.forLanguageTag(dataLocale);
    DateTimePatternGenerator generator = DateTimePatternGenerator.getInstance(locale);
    // ICU4J only provides access to date- or time-only skeletons, with the exception of the
    // weekday property, which may also appear in time-only skeletons or as a single skeleton
    // property. That means we need to handle four different cases:
    // 1) formatRecord contains only date properties
    // 2) formatRecord contains only time properties
    // 3) formatRecord contains date and time properties
    // 4) formatRecord contains only the weekday property
    boolean optDate = formatRecord.isDate();
    boolean optTime = formatRecord.isTime();
    boolean optDateTime = optDate && optTime;
    // get the preferred hour representation (12-hour-cycle or 24-hour-cycle)
    boolean optHour12 = optTime && formatRecord.isHour12(locale);
    // handle date and time patterns separately
    int bestDateScore = Integer.MIN_VALUE;
    int bestTimeScore = Integer.MIN_VALUE;
    String bestDateFormat = null;
    String bestTimeFormat = null;
    Map<String, String> skeletons = addCanonicalSkeletons(generator.getSkeletons(null));
    Set<Skeleton> hourSkeletons = null;
    for (Map.Entry<String, String> entry : skeletons.entrySet()) {
        Skeleton skeleton = Skeleton.fromSkeleton(entry.getKey());
        // getSkeletons() does not return any date+time skeletons
        assert !(skeleton.isDate() && skeleton.isTime());
        // skip skeleton if it contains unsupported fields
        if (!isSupported(skeleton)) {
            continue;
        }
        // skip skeleton if no matching skeleton with opposite hour representation is present
        if (optTime && skeleton.has(DateField.Hour)) {
            if (hourSkeletons == null) {
                hourSkeletons = validHourSkeletons(skeletons);
            }
            if (!hourSkeletons.contains(skeleton)) {
                continue;
            }
        }
        if (optDateTime) {
            // the date-skeleton part
            if (skeleton.isTime() && skeleton.has(DateField.Weekday)) {
                continue;
            }
            // skip time-skeleton if hour representation does not match requested value
            if (skeleton.isTime() && skeleton.isHour12() != optHour12) {
                continue;
            }
            if (skeleton.isDate()) {
                int score = computeScore(formatRecord, skeleton);
                if (score > bestDateScore) {
                    bestDateScore = score;
                    bestDateFormat = entry.getValue();
                }
            } else {
                int score = computeScore(formatRecord, skeleton);
                if (score > bestTimeScore) {
                    bestTimeScore = score;
                    bestTimeFormat = entry.getValue();
                }
            }
        } else if (optDate) {
            // skip time-skeletons if only date fields were requested
            if (skeleton.isTime()) {
                continue;
            }
            int score = computeScore(formatRecord, skeleton);
            if (score > bestDateScore) {
                bestDateScore = score;
                bestDateFormat = entry.getValue();
            }
        } else if (optTime) {
            // skip date-skeletons if only time fields were requested
            if (skeleton.isDate()) {
                continue;
            }
            // skip time-skeleton if hour representation does not match requested value
            if (skeleton.isHour12() != optHour12) {
                continue;
            }
            int score = computeScore(formatRecord, skeleton);
            if (score > bestTimeScore) {
                bestTimeScore = score;
                bestTimeFormat = entry.getValue();
            }
        } else {
            // weekday-only case
            int score = computeScore(formatRecord, skeleton);
            if (score > bestDateScore) {
                bestDateScore = score;
                bestDateFormat = entry.getValue();
            }
        }
    }
    // Ensure at least one pattern was found.
    assert !optDate || bestDateFormat != null;
    assert !optTime || bestTimeFormat != null;
    assert !(!optDate && !optTime) || bestDateFormat != null;
    // Fixup the hour representation to match the expected hour cycle.
    if (optTime && formatRecord.hasNonDefaultHourCycle(locale)) {
        bestTimeFormat = modifyHour(formatRecord, bestTimeFormat);
    }
    // Return the matched pattern.
    if (optDateTime) {
        String dateTimeFormat = generator.getDateTimeFormat();
        return MessageFormat.format(dateTimeFormat, bestTimeFormat, bestDateFormat);
    }
    if (optTime) {
        return bestTimeFormat;
    }
    return bestDateFormat;
}
Also used : ULocale(com.ibm.icu.util.ULocale) DateTimePatternGenerator(com.ibm.icu.text.DateTimePatternGenerator) Skeleton(com.github.anba.es6draft.runtime.objects.intl.DateFieldSymbolTable.Skeleton) Map(java.util.Map) AbstractMap(java.util.AbstractMap)

Aggregations

ULocale (com.ibm.icu.util.ULocale)25 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)5 Collator (com.ibm.icu.text.Collator)2 DateTimePatternGenerator (com.ibm.icu.text.DateTimePatternGenerator)2 DecimalFormat (com.ibm.icu.text.DecimalFormat)2 SimpleDateFormat (com.ibm.icu.text.SimpleDateFormat)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 ArrayList (java.util.ArrayList)2 Locale (java.util.Locale)2 Map (java.util.Map)2 Detector (com.cybozu.labs.langdetect.Detector)1 LangDetectException (com.cybozu.labs.langdetect.LangDetectException)1 Language (com.cybozu.labs.langdetect.Language)1 Skeleton (com.github.anba.es6draft.runtime.objects.intl.DateFieldSymbolTable.Skeleton)1 CacheLoader (com.google.common.cache.CacheLoader)1 LocaleString (com.google.template.soy.shared.restricted.ApiCallScopeBindingAnnotations.LocaleString)1 ICUResourceBundle (com.ibm.icu.impl.ICUResourceBundle)1 BreakIterator (com.ibm.icu.text.BreakIterator)1 ListFormatter (com.ibm.icu.text.ListFormatter)1