use of org.opensextant.data.Language in project Xponents by OpenSextant.
the class GeonamesUtility method primaryLanguage.
/**
* Primary language for a given country. By our convention, this will be the major language family, not the locale.
* E.g.,
* primary language of Australia? 'en', not 'en_AU'; The hashmap records the first entry only which is language.
*
* @param cc
* Country code
* @return Language object
*/
public Language primaryLanguage(String cc) {
Country C = isoCountries.get(cc);
if (C == null) {
return null;
}
String lid = C.getPrimaryLanguage();
if (lid == null) {
return null;
}
Language L = TextUtils.getLanguage(lid);
if (L != null) {
return L;
}
// What language?
return new Language(lid, lid, lid);
}
use of org.opensextant.data.Language in project Xponents by OpenSextant.
the class TextUtils method isRomanceLanguage.
/**
* Romance languages = SPA + POR + ITA + FRA + ROM
*
* Extend definition as needed.
*
* @param l
* lang ID
* @return true if language is a Romance language
*/
public static boolean isRomanceLanguage(String l) {
Language lang = getLanguage(l);
if (lang == null) {
return false;
}
String id = lang.getISO639_1_Code();
return _isRomanceLanguage(id);
}
use of org.opensextant.data.Language in project Xponents by OpenSextant.
the class TextUtils method initLOCLanguageData.
/**
* This is Libray of Congress data for language IDs. This is offered as a
* tool to help downstream language ID and enrich metadata when tagging data
* from particular countries.
*
* Reference: http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
*
* @throws java.io.IOException
* if resource file is not found
*/
public static void initLOCLanguageData() throws java.io.IOException {
//
// DATA FILE: http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
java.io.InputStream io = TextUtils.class.getResourceAsStream("/ISO-639-2_utf-8.txt");
java.io.Reader featIO = new InputStreamReader(io, "UTF-8");
CsvListReader langReader = new CsvListReader(featIO, new CsvPreference.Builder('"', '|', "\n").build());
CellProcessor[] cells = { new Optional(), new Optional(), new Optional(), new Optional(), new NotNull() };
List<Object> lang = null;
/*
* ISO3,XX,ISO2,NAME,NAME_FR
*/
while ((lang = langReader.read(cells)) != null) {
//
String names = (String) lang.get(3);
if (isBlank(names)) {
continue;
}
if ("NAME".equals(names)) {
continue;
}
List<String> namelist = TextUtils.string2list(names, ";");
String iso3 = (String) lang.get(0);
if (iso3.startsWith("#")) {
continue;
}
String iso2 = (String) lang.get(2);
Language l = new Language(iso3, iso2, namelist.get(0));
addLanguage(l);
}
langReader.close();
// Popular languages that go by other codes.
// ISO languages as listed by LOC are listed with Bibliographic vs.
// Terminological codes.
// FRE vs. FRA are subtle difference for French, but important if you
// cannot find French by lang ID.
//
// Fully override French and Trad Chinese:
Language fr = new Language("fra", "fr", "French");
addLanguage(fr, true);
Language zhtw = new Language("zh-tw", "zt", "Chinese/Taiwain");
addLanguage(zhtw, true);
// Delicately insert more common names and codes as well as locales
// here.
Language zh = new Language("zho", "zh", "Chinese");
languageMapISO639.put("zho", zh);
Language zhcn = new Language("chi", "zh", "Chinese");
languageMapISO639.put("zh-cn", zhcn);
Language fas = new Language("per", "fa", "Farsi");
languageMapISO639.put("farsi", fas);
// Locales of English -- are still "English"
Language en1 = new Language("eng", "en", "English");
languageMapISO639.put("en-gb", en1);
languageMapISO639.put("en-us", en1);
languageMapISO639.put("en-au", en1);
}
use of org.opensextant.data.Language in project Xponents by OpenSextant.
the class TextUtils method initLanguageData.
/**
* Initialize language codes and metadata. This establishes a map for the
* most common language codes/names that exist in at least ISO-639-1 and
* have a non-zero 2-char ID.
*
* <pre>
* Based on:
* http://stackoverflow.com/questions/674041/is-there-an-elegant-way
* -to-convert-iso-639-2-3-letter-language-codes-to-java-lo
*
* Actual code mappings: en => eng eng => en
*
* cel => '' // Celtic; Avoid this.
*
* tr => tur tur => tr
*
* Names: tr => turkish tur => turkish turkish => tr // ISO2 only
*
* </pre>
*/
public static void initLanguageData() {
Locale[] locales = Locale.getAvailableLocales();
for (Locale locale : locales) {
Language l = new Language(locale.getISO3Language(), locale.getLanguage(), locale.getDisplayLanguage());
addLanguage(l);
}
}
use of org.opensextant.data.Language in project Xponents by OpenSextant.
the class TextUtils method isEuroLanguage.
/**
* European languages = Romance + GER + ENG Extend definition as needed.
*
* @param l
* language ID
* @return true if language is European in nature
*/
public static boolean isEuroLanguage(String l) {
Language lang = getLanguage(l);
if (lang == null) {
return false;
}
String id = lang.getISO639_1_Code();
return (_isRomanceLanguage(id) || id.equals(germanLang) || id.equals(englishLang));
}
Aggregations