use of com.ibm.icu.util.ULocale in project closure-templates by google.
the class SoyMsgBundleWithFullLocaleTest method testSoyMsgBundleWithFullLocale_deprecatedLanguageCode.
@Test
public void testSoyMsgBundleWithFullLocale_deprecatedLanguageCode() throws Exception {
// Given a message bundle with a deprecated language code ("iw").
SoyMsgBundle iwBundle = new SoyMsgBundleWithFullLocale(SoyMsgBundle.EMPTY, new ULocale("iw"), "iw");
// Expect the original soy bundle and locale to be preserved.
SoyMsgBundle bundle = SoyMsgBundleWithFullLocale.preservingLocaleIfAllowed(iwBundle, new Locale("iw"));
assertThat(bundle.getLocaleString()).isEqualTo("iw");
assertThat(bundle).isEqualTo(iwBundle);
}
use of com.ibm.icu.util.ULocale in project validator by validator.
the class LanguageDetectingChecker method checkContentLanguageHeader.
private void checkContentLanguageHeader(String detectedLanguage, String detectedLanguageName, String detectedLanguageCode, String preferredLanguageCode) throws SAXException {
if ("".equals(httpContentLangHeader) || httpContentLangHeader.contains(",")) {
return;
}
String message = "";
String lowerCaseContentLang = httpContentLangHeader.toLowerCase();
String contentLangCode = new ULocale(lowerCaseContentLang).getLanguage();
if ("tl".equals(detectedLanguageCode) && ("ceb".equals(contentLangCode) || "ilo".equals(contentLangCode) || "pag".equals(contentLangCode) || "war".equals(contentLangCode))) {
return;
}
if ("id".equals(detectedLanguageCode) && "min".equals(contentLangCode)) {
return;
}
if ("ms".equals(detectedLanguageCode) && "min".equals(contentLangCode)) {
return;
}
if ("hr".equals(detectedLanguageCode) && ("sr".equals(contentLangCode) || "bs".equals(contentLangCode) || "sh".equals(contentLangCode))) {
return;
}
if ("sr".equals(detectedLanguageCode) && ("hr".equals(contentLangCode) || "bs".equals(contentLangCode) || "sh".equals(contentLangCode))) {
return;
}
if ("bs".equals(detectedLanguageCode) && ("hr".equals(contentLangCode) || "sr".equals(contentLangCode) || "sh".equals(contentLangCode))) {
return;
}
if ("de".equals(detectedLanguageCode) && ("bar".equals(contentLangCode) || "gsw".equals(contentLangCode) || "lb".equals(contentLangCode))) {
return;
}
if ("zh".equals(detectedLanguageCode) && "yue".equals(lowerCaseContentLang)) {
return;
}
if ("es".equals(detectedLanguageCode) && ("an".equals(contentLangCode) || "ast".equals(contentLangCode))) {
return;
}
if ("it".equals(detectedLanguageCode) && ("co".equals(contentLangCode) || "pms".equals(contentLangCode) || "vec".equals(contentLangCode) || "lmo".equals(contentLangCode) || "scn".equals(contentLangCode) || "nap".equals(contentLangCode))) {
return;
}
if ("rw".equals(detectedLanguageCode) && "rn".equals(contentLangCode)) {
return;
}
if ("mhr".equals(detectedLanguageCode) && ("chm".equals(contentLangCode) || "mrj".equals(contentLangCode))) {
return;
}
if ("mrj".equals(detectedLanguageCode) && ("chm".equals(contentLangCode) || "mhr".equals(contentLangCode))) {
return;
}
if ("ru".equals(detectedLanguageCode) && "bg".equals(contentLangCode)) {
return;
}
if (zhSubtagMismatch(detectedLanguage, lowerCaseContentLang) || !contentLangCode.equals(detectedLanguageCode)) {
message = "This document appears to be written in %s but the value" + " of the HTTP \u201CContent-Language\u201D header is" + " \u201C%s\u201D. Consider changing it to" + " \u201C%s\u201D (or variant).";
warn(String.format(message, detectedLanguageName, lowerCaseContentLang, preferredLanguageCode, preferredLanguageCode));
}
if (htmlElementHasLang) {
message = "The value of the HTTP \u201CContent-Language\u201D" + " header is \u201C%s\u201D but it will be ignored because" + " the \u201Chtml\u201D start tag has %s.";
String lowerCaseLang = htmlElementLangAttrValue.toLowerCase();
if (htmlElementHasLang) {
if (zhSubtagMismatch(lowerCaseContentLang, lowerCaseLang) || !contentLangCode.equals(declaredLangCode)) {
warn(String.format(message, httpContentLangHeader, getAttValueExpr("lang", htmlElementLangAttrValue)), htmlStartTagLocator);
}
}
}
}
use of com.ibm.icu.util.ULocale in project validator by validator.
the class LanguageDetectingChecker method startElement.
/**
* @see nu.validator.checker.Checker#startElement(java.lang.String,
* java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
if ("html".equals(localName)) {
htmlStartTagLocator = new LocatorImpl(getDocumentLocator());
for (int i = 0; i < atts.getLength(); i++) {
if ("lang".equals(atts.getLocalName(i))) {
if (request != null) {
request.setAttribute("http://validator.nu/properties/lang-found", true);
}
htmlElementHasLang = true;
htmlElementLangAttrValue = atts.getValue(i);
declaredLangCode = new ULocale(htmlElementLangAttrValue).getLanguage();
} else if ("dir".equals(atts.getLocalName(i))) {
hasDir = true;
dirAttrValue = atts.getValue(i);
}
}
} else if ("body".equals(localName)) {
inBody = true;
} else if (inBody) {
if (currentOpenElementsInDifferentLang > 0) {
currentOpenElementsInDifferentLang++;
} else {
for (int i = 0; i < atts.getLength(); i++) {
if ("lang".equals(atts.getLocalName(i))) {
if (!"".equals(htmlElementLangAttrValue) && !htmlElementLangAttrValue.equals(atts.getValue(i))) {
currentOpenElementsInDifferentLang++;
}
}
}
}
}
if (Arrays.binarySearch(SKIP_NAMES, localName) >= 0) {
currentOpenElementsWithSkipName++;
}
}
use of com.ibm.icu.util.ULocale in project validator by validator.
the class LanguageDetectingChecker method detectLanguageAndCheckAgainstDeclaredLanguage.
private void detectLanguageAndCheckAgainstDeclaredLanguage() throws SAXException {
if (nonWhitespaceCharacterCount < MIN_CHARS) {
warnIfMissingLang();
return;
}
if (// "No Linguistic Content"
"zxx".equals(declaredLangCode) || // Esperanto
"eo".equals(declaredLangCode) || // Latin
"la".equals(declaredLangCode)) {
return;
}
if (LANG_TAGS_BY_TLD.containsKey(tld) && Arrays.binarySearch(LANG_TAGS_BY_TLD.get(tld), declaredLangCode) >= 0) {
return;
}
try {
String textContent = //
documentContent.toString().replaceAll("\\s+", " ");
String detectedLanguage = "";
Detector detector = DetectorFactory.create();
detector.append(textContent);
detector.getProbabilities();
ArrayList<String> possibileLanguages = new ArrayList<>();
ArrayList<Language> possibilities = detector.getProbabilities();
for (Language possibility : possibilities) {
possibileLanguages.add(possibility.lang);
ULocale plocale = new ULocale(possibility.lang);
if (Arrays.binarySearch(COMMON_LANGS, possibility.lang) < 0 && systemId != null) {
log4j.info(String.format("%s %s %s", plocale.getDisplayName(), possibility.prob, systemId));
}
if (possibility.prob > MIN_PROBABILITY) {
detectedLanguage = possibility.lang;
setDocumentLanguage(detectedLanguage);
} else if ((possibileLanguages.contains("hr") && (possibileLanguages.contains("sr-latn") || possibileLanguages.contains("bs"))) || (possibileLanguages.contains("sr-latn") && (possibileLanguages.contains("hr") || possibileLanguages.contains("bs"))) || (possibileLanguages.contains("bs") && (possibileLanguages.contains("hr") || possibileLanguages.contains("sr-latn")))) {
if (htmlElementHasLang || systemId != null) {
detectedLanguage = getDetectedLanguageSerboCroatian();
setDocumentLanguage(detectedLanguage);
}
if ("sh".equals(detectedLanguage)) {
checkLangAttributeSerboCroatian();
return;
}
}
}
if ("".equals(detectedLanguage)) {
warnIfMissingLang();
return;
}
String detectedLanguageName = "";
String preferredLanguageCode = "";
ULocale locale = new ULocale(detectedLanguage);
String detectedLanguageCode = locale.getLanguage();
if ("no".equals(detectedLanguage)) {
checkLangAttributeNorwegian();
checkContentLanguageHeaderNorwegian(detectedLanguage, detectedLanguageName, detectedLanguageCode);
return;
}
if ("zh-hans".equals(detectedLanguage)) {
detectedLanguageName = "Simplified Chinese";
preferredLanguageCode = "zh-hans";
} else if ("zh-hant".equals(detectedLanguage)) {
detectedLanguageName = "Traditional Chinese";
preferredLanguageCode = "zh-hant";
} else if ("mhr".equals(detectedLanguage)) {
detectedLanguageName = "Meadow Mari";
preferredLanguageCode = "mhr";
} else if ("mrj".equals(detectedLanguage)) {
detectedLanguageName = "Hill Mari";
preferredLanguageCode = "mrj";
} else if ("nah".equals(detectedLanguage)) {
detectedLanguageName = "Nahuatl";
preferredLanguageCode = "nah";
} else if ("pnb".equals(detectedLanguage)) {
detectedLanguageName = "Western Panjabi";
preferredLanguageCode = "pnb";
} else if ("sr-cyrl".equals(detectedLanguage)) {
detectedLanguageName = "Serbian";
preferredLanguageCode = "sr";
} else if ("sr-latn".equals(detectedLanguage)) {
detectedLanguageName = "Serbian";
preferredLanguageCode = "sr";
} else if ("uz-cyrl".equals(detectedLanguage)) {
detectedLanguageName = "Uzbek";
preferredLanguageCode = "uz";
} else if ("uz-latn".equals(detectedLanguage)) {
detectedLanguageName = "Uzbek";
preferredLanguageCode = "uz";
} else if ("zxx".equals(detectedLanguage)) {
detectedLanguageName = "Lorem ipsum text";
preferredLanguageCode = "zxx";
} else {
detectedLanguageName = locale.getDisplayName();
preferredLanguageCode = detectedLanguageCode;
}
checkLangAttribute(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
checkDirAttribute(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
checkContentLanguageHeader(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
} catch (LangDetectException e) {
}
}
use of com.ibm.icu.util.ULocale in project es6draft by anba.
the class DateTimeFormatConstructor method BasicFormatMatcher.
/**
* 12.1.3 BasicFormatMatcher (options, formats)
*
* @param formatRecord
* the format matcher record
* @param dataLocale
* the locale
* @return the basic format matcher
*/
public static String BasicFormatMatcher(FormatMatcherRecord formatRecord, String dataLocale) {
ULocale locale = ULocale.forLanguageTag(dataLocale);
DateTimePatternGenerator generator = DateTimePatternGenerator.getInstance(locale);
// ICU4J only provides access to date- or time-only skeletons, with the exception of the
// weekday property, which may also appear in time-only skeletons or as a single skeleton
// property. That means we need to handle four different cases:
// 1) formatRecord contains only date properties
// 2) formatRecord contains only time properties
// 3) formatRecord contains date and time properties
// 4) formatRecord contains only the weekday property
boolean optDate = formatRecord.isDate();
boolean optTime = formatRecord.isTime();
boolean optDateTime = optDate && optTime;
// get the preferred hour representation (12-hour-cycle or 24-hour-cycle)
boolean optHour12 = optTime && formatRecord.isHour12(locale);
// handle date and time patterns separately
int bestDateScore = Integer.MIN_VALUE;
int bestTimeScore = Integer.MIN_VALUE;
String bestDateFormat = null;
String bestTimeFormat = null;
Map<String, String> skeletons = addCanonicalSkeletons(generator.getSkeletons(null));
Set<Skeleton> hourSkeletons = null;
for (Map.Entry<String, String> entry : skeletons.entrySet()) {
Skeleton skeleton = Skeleton.fromSkeleton(entry.getKey());
// getSkeletons() does not return any date+time skeletons
assert !(skeleton.isDate() && skeleton.isTime());
// skip skeleton if it contains unsupported fields
if (!isSupported(skeleton)) {
continue;
}
// skip skeleton if no matching skeleton with opposite hour representation is present
if (optTime && skeleton.has(DateField.Hour)) {
if (hourSkeletons == null) {
hourSkeletons = validHourSkeletons(skeletons);
}
if (!hourSkeletons.contains(skeleton)) {
continue;
}
}
if (optDateTime) {
// the date-skeleton part
if (skeleton.isTime() && skeleton.has(DateField.Weekday)) {
continue;
}
// skip time-skeleton if hour representation does not match requested value
if (skeleton.isTime() && skeleton.isHour12() != optHour12) {
continue;
}
if (skeleton.isDate()) {
int score = computeScore(formatRecord, skeleton);
if (score > bestDateScore) {
bestDateScore = score;
bestDateFormat = entry.getValue();
}
} else {
int score = computeScore(formatRecord, skeleton);
if (score > bestTimeScore) {
bestTimeScore = score;
bestTimeFormat = entry.getValue();
}
}
} else if (optDate) {
// skip time-skeletons if only date fields were requested
if (skeleton.isTime()) {
continue;
}
int score = computeScore(formatRecord, skeleton);
if (score > bestDateScore) {
bestDateScore = score;
bestDateFormat = entry.getValue();
}
} else if (optTime) {
// skip date-skeletons if only time fields were requested
if (skeleton.isDate()) {
continue;
}
// skip time-skeleton if hour representation does not match requested value
if (skeleton.isHour12() != optHour12) {
continue;
}
int score = computeScore(formatRecord, skeleton);
if (score > bestTimeScore) {
bestTimeScore = score;
bestTimeFormat = entry.getValue();
}
} else {
// weekday-only case
int score = computeScore(formatRecord, skeleton);
if (score > bestDateScore) {
bestDateScore = score;
bestDateFormat = entry.getValue();
}
}
}
// Ensure at least one pattern was found.
assert !optDate || bestDateFormat != null;
assert !optTime || bestTimeFormat != null;
assert !(!optDate && !optTime) || bestDateFormat != null;
// Fixup the hour representation to match the expected hour cycle.
if (optTime && formatRecord.hasNonDefaultHourCycle(locale)) {
bestTimeFormat = modifyHour(formatRecord, bestTimeFormat);
}
// Return the matched pattern.
if (optDateTime) {
String dateTimeFormat = generator.getDateTimeFormat();
return MessageFormat.format(dateTimeFormat, bestTimeFormat, bestDateFormat);
}
if (optTime) {
return bestTimeFormat;
}
return bestDateFormat;
}
Aggregations