use of com.cybozu.labs.langdetect.Detector in project SearchServices by Alfresco.
the class AbstractQParser method detectLanguage.
private List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) {
// to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l : langlist) {
if ((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang))) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
use of com.cybozu.labs.langdetect.Detector in project stanbol by apache.
the class LanguageIdentifier method getLanguages.
public List<Language> getLanguages(String text) throws LangDetectException {
Detector detector = DetectorFactory.create();
detector.append(text);
return detector.getProbabilities();
}
use of com.cybozu.labs.langdetect.Detector in project stanbol by apache.
the class LanguageIdentifier method getLanguage.
public String getLanguage(String text) throws LangDetectException {
Detector detector = DetectorFactory.create();
detector.append(text);
return detector.detect();
}
use of com.cybozu.labs.langdetect.Detector in project Asqatasun by Asqatasun.
the class LanguageDetector method detectLanguage.
/**
* Perform the detection
*
* @param text to test
* @return the detected language
*/
public LanguageDetectionResult detectLanguage(String text) {
try {
Detector detector = DetectorFactory.create(0.15);
// issue#47 correction
detector.append(text.toLowerCase());
ArrayList<Language> languages = detector.getProbabilities();
Language detectedLanguage = extractLangWithHighestProbability(languages);
return new LanguageDetectionResult(detectedLanguage, text, languages.size() > 1);
} catch (LangDetectException ex) {
LOGGER.warn(ex);
}
return null;
}
use of com.cybozu.labs.langdetect.Detector in project lucene-solr by apache.
the class LangDetectLanguageIdentifierUpdateProcessor method detectLanguage.
@Override
protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
try {
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(maxTotalChars);
for (String fieldName : inputFields) {
log.debug("Appending field " + fieldName);
if (doc.containsKey(fieldName)) {
Collection<Object> fieldValues = doc.getFieldValues(fieldName);
if (fieldValues != null) {
for (Object content : fieldValues) {
if (content instanceof String) {
String stringContent = (String) content;
if (stringContent.length() > maxFieldValueChars) {
detector.append(stringContent.substring(0, maxFieldValueChars));
} else {
detector.append(stringContent);
}
detector.append(" ");
} else {
log.warn("Field " + fieldName + " not a String value, not including in detection");
}
}
}
}
}
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l : langlist) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
Aggregations