Search in sources :

Example 1 with Detector

use of com.cybozu.labs.langdetect.Detector in project SearchServices by Alfresco.

the class AbstractQParser method detectLanguage.

private List<DetectedLanguage> detectLanguage(String content) {
    if (content.trim().length() == 0) {
        // to be consistent with the tika impl?
        log.debug("No input text to detect language from, returning empty list");
        return Collections.emptyList();
    }
    try {
        Detector detector = DetectorFactory.create();
        detector.append(content);
        ArrayList<Language> langlist = detector.getProbabilities();
        ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
        for (Language l : langlist) {
            if ((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang))) {
                solrLangList.add(new DetectedLanguage(l.lang, l.prob));
            }
        }
        return solrLangList;
    } catch (LangDetectException e) {
        log.debug("Could not determine language, returning empty list: ", e);
        return Collections.emptyList();
    }
}
Also used : Detector(com.cybozu.labs.langdetect.Detector) Language(com.cybozu.labs.langdetect.Language) ArrayList(java.util.ArrayList) LangDetectException(com.cybozu.labs.langdetect.LangDetectException)

Example 2 with Detector

use of com.cybozu.labs.langdetect.Detector in project stanbol by apache.

the class LanguageIdentifier method getLanguages.

public List<Language> getLanguages(String text) throws LangDetectException {
    Detector detector = DetectorFactory.create();
    detector.append(text);
    return detector.getProbabilities();
}
Also used : Detector(com.cybozu.labs.langdetect.Detector)

Example 3 with Detector

use of com.cybozu.labs.langdetect.Detector in project stanbol by apache.

the class LanguageIdentifier method getLanguage.

public String getLanguage(String text) throws LangDetectException {
    Detector detector = DetectorFactory.create();
    detector.append(text);
    return detector.detect();
}
Also used : Detector(com.cybozu.labs.langdetect.Detector)

Example 4 with Detector

use of com.cybozu.labs.langdetect.Detector in project Asqatasun by Asqatasun.

the class LanguageDetector method detectLanguage.

/**
     * Perform the detection 
     * 
     * @param text to test
     * @return the detected language
     */
public LanguageDetectionResult detectLanguage(String text) {
    try {
        Detector detector = DetectorFactory.create(0.15);
        // issue#47 correction
        detector.append(text.toLowerCase());
        ArrayList<Language> languages = detector.getProbabilities();
        Language detectedLanguage = extractLangWithHighestProbability(languages);
        return new LanguageDetectionResult(detectedLanguage, text, languages.size() > 1);
    } catch (LangDetectException ex) {
        LOGGER.warn(ex);
    }
    return null;
}
Also used : Detector(com.cybozu.labs.langdetect.Detector) Language(com.cybozu.labs.langdetect.Language) LangDetectException(com.cybozu.labs.langdetect.LangDetectException)

Example 5 with Detector

use of com.cybozu.labs.langdetect.Detector in project lucene-solr by apache.

the class LangDetectLanguageIdentifierUpdateProcessor method detectLanguage.

@Override
protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
    try {
        Detector detector = DetectorFactory.create();
        detector.setMaxTextLength(maxTotalChars);
        for (String fieldName : inputFields) {
            log.debug("Appending field " + fieldName);
            if (doc.containsKey(fieldName)) {
                Collection<Object> fieldValues = doc.getFieldValues(fieldName);
                if (fieldValues != null) {
                    for (Object content : fieldValues) {
                        if (content instanceof String) {
                            String stringContent = (String) content;
                            if (stringContent.length() > maxFieldValueChars) {
                                detector.append(stringContent.substring(0, maxFieldValueChars));
                            } else {
                                detector.append(stringContent);
                            }
                            detector.append(" ");
                        } else {
                            log.warn("Field " + fieldName + " not a String value, not including in detection");
                        }
                    }
                }
            }
        }
        ArrayList<Language> langlist = detector.getProbabilities();
        ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
        for (Language l : langlist) {
            solrLangList.add(new DetectedLanguage(l.lang, l.prob));
        }
        return solrLangList;
    } catch (LangDetectException e) {
        log.debug("Could not determine language, returning empty list: ", e);
        return Collections.emptyList();
    }
}
Also used : Detector(com.cybozu.labs.langdetect.Detector) Language(com.cybozu.labs.langdetect.Language) ArrayList(java.util.ArrayList) LangDetectException(com.cybozu.labs.langdetect.LangDetectException)

Aggregations

Detector (com.cybozu.labs.langdetect.Detector)6 LangDetectException (com.cybozu.labs.langdetect.LangDetectException)4 Language (com.cybozu.labs.langdetect.Language)4 ArrayList (java.util.ArrayList)3 ULocale (com.ibm.icu.util.ULocale)1