Search in sources :

Example 1 with Language

use of org.opensextant.data.Language in project Xponents by OpenSextant.

the class GeonamesUtility method primaryLanguage.

/**
     * Primary language for a given country. By our convention, this will be the major language family, not the locale.
     * E.g.,
     * primary language of Australia? 'en', not 'en_AU'; The hashmap records the first entry only which is language.
     * 
     * @param cc
     *            Country code
     * @return Language object
     */
public Language primaryLanguage(String cc) {
    Country C = isoCountries.get(cc);
    if (C == null) {
        return null;
    }
    String lid = C.getPrimaryLanguage();
    if (lid == null) {
        return null;
    }
    Language L = TextUtils.getLanguage(lid);
    if (L != null) {
        return L;
    }
    // What language?
    return new Language(lid, lid, lid);
}
Also used : Language(org.opensextant.data.Language) Country(org.opensextant.data.Country)

Example 2 with Language

use of org.opensextant.data.Language in project Xponents by OpenSextant.

the class TextUtils method isRomanceLanguage.

/**
     * Romance languages = SPA + POR + ITA + FRA + ROM
     *
     * Extend definition as needed.
     * 
     * @param l
     *            lang ID
     * @return true if language is a Romance language
     */
public static boolean isRomanceLanguage(String l) {
    Language lang = getLanguage(l);
    if (lang == null) {
        return false;
    }
    String id = lang.getISO639_1_Code();
    return _isRomanceLanguage(id);
}
Also used : Language(org.opensextant.data.Language)

Example 3 with Language

use of org.opensextant.data.Language in project Xponents by OpenSextant.

the class TextUtils method initLOCLanguageData.

/**
     * This is Libray of Congress data for language IDs. This is offered as a
     * tool to help downstream language ID and enrich metadata when tagging data
     * from particular countries.
     *
     * Reference: http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
     *
     * @throws java.io.IOException
     *             if resource file is not found
     */
public static void initLOCLanguageData() throws java.io.IOException {
    //
    // DATA FILE: http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
    java.io.InputStream io = TextUtils.class.getResourceAsStream("/ISO-639-2_utf-8.txt");
    java.io.Reader featIO = new InputStreamReader(io, "UTF-8");
    CsvListReader langReader = new CsvListReader(featIO, new CsvPreference.Builder('"', '|', "\n").build());
    CellProcessor[] cells = { new Optional(), new Optional(), new Optional(), new Optional(), new NotNull() };
    List<Object> lang = null;
    /*
         * ISO3,XX,ISO2,NAME,NAME_FR
         */
    while ((lang = langReader.read(cells)) != null) {
        //
        String names = (String) lang.get(3);
        if (isBlank(names)) {
            continue;
        }
        if ("NAME".equals(names)) {
            continue;
        }
        List<String> namelist = TextUtils.string2list(names, ";");
        String iso3 = (String) lang.get(0);
        if (iso3.startsWith("#")) {
            continue;
        }
        String iso2 = (String) lang.get(2);
        Language l = new Language(iso3, iso2, namelist.get(0));
        addLanguage(l);
    }
    langReader.close();
    // Popular languages that go by other codes.
    // ISO languages as listed by LOC are listed with Bibliographic vs.
    // Terminological codes.
    // FRE vs. FRA are subtle difference for French, but important if you
    // cannot find French by lang ID.
    //
    // Fully override French and Trad Chinese:
    Language fr = new Language("fra", "fr", "French");
    addLanguage(fr, true);
    Language zhtw = new Language("zh-tw", "zt", "Chinese/Taiwain");
    addLanguage(zhtw, true);
    // Delicately insert more common names and codes as well as locales
    // here.
    Language zh = new Language("zho", "zh", "Chinese");
    languageMapISO639.put("zho", zh);
    Language zhcn = new Language("chi", "zh", "Chinese");
    languageMapISO639.put("zh-cn", zhcn);
    Language fas = new Language("per", "fa", "Farsi");
    languageMapISO639.put("farsi", fas);
    // Locales of English -- are still "English"
    Language en1 = new Language("eng", "en", "English");
    languageMapISO639.put("en-gb", en1);
    languageMapISO639.put("en-us", en1);
    languageMapISO639.put("en-au", en1);
}
Also used : CsvListReader(org.supercsv.io.CsvListReader) InputStreamReader(java.io.InputStreamReader) Optional(org.supercsv.cellprocessor.Optional) NotNull(org.supercsv.cellprocessor.constraint.NotNull) Language(org.opensextant.data.Language) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor)

Example 4 with Language

use of org.opensextant.data.Language in project Xponents by OpenSextant.

the class TextUtils method initLanguageData.

/**
     * Initialize language codes and metadata. This establishes a map for the
     * most common language codes/names that exist in at least ISO-639-1 and
     * have a non-zero 2-char ID.
     *
     * <pre>
     * Based on:
     * http://stackoverflow.com/questions/674041/is-there-an-elegant-way
     * -to-convert-iso-639-2-3-letter-language-codes-to-java-lo
     *
     * Actual code mappings: en =&gt; eng eng =&gt; en
     *
     * cel =&gt; '' // Celtic; Avoid this.
     *
     * tr =&gt; tur tur =&gt; tr
     *
     * Names: tr =&gt; turkish tur =&gt; turkish turkish =&gt; tr // ISO2 only
     *
     * </pre>
     */
public static void initLanguageData() {
    Locale[] locales = Locale.getAvailableLocales();
    for (Locale locale : locales) {
        Language l = new Language(locale.getISO3Language(), locale.getLanguage(), locale.getDisplayLanguage());
        addLanguage(l);
    }
}
Also used : Locale(java.util.Locale) Language(org.opensextant.data.Language)

Example 5 with Language

use of org.opensextant.data.Language in project Xponents by OpenSextant.

the class TextUtils method isEuroLanguage.

/**
     * European languages = Romance + GER + ENG Extend definition as needed.
     * 
     * @param l
     *            language ID
     * @return true if language is European in nature
     */
public static boolean isEuroLanguage(String l) {
    Language lang = getLanguage(l);
    if (lang == null) {
        return false;
    }
    String id = lang.getISO639_1_Code();
    return (_isRomanceLanguage(id) || id.equals(germanLang) || id.equals(englishLang));
}
Also used : Language(org.opensextant.data.Language)

Aggregations

Language (org.opensextant.data.Language)10 Country (org.opensextant.data.Country)2 InputStreamReader (java.io.InputStreamReader)1 Locale (java.util.Locale)1 Optional (org.supercsv.cellprocessor.Optional)1 NotNull (org.supercsv.cellprocessor.constraint.NotNull)1 CellProcessor (org.supercsv.cellprocessor.ift.CellProcessor)1 CsvListReader (org.supercsv.io.CsvListReader)1