use of zemberek.langid.model.CompressedCharNgramModel in project zemberek-nlp by ahmetaa.
the class LanguageIdentifier method fromInternalModels.
/**
* Loads all internal models from internal compressed resource folder.
*
* @return LanguageIdentifier
* @throws IOException In case of an IO error.
*/
public static LanguageIdentifier fromInternalModels() throws IOException {
String[] languages = Language.allLanguages();
Map<String, CharNgramLanguageModel> map = Maps.newHashMap();
Set<String> langs = Sets.newHashSet(languages);
for (String language : langs) {
String resourceName = "/models/compressed/" + language + ".clm";
InputStream is = Resources.getResource(LanguageIdentifier.class, resourceName).openStream();
if (is == null) {
throw new IllegalArgumentException("No internal model found: " + resourceName);
}
CompressedCharNgramModel model = CompressedCharNgramModel.load(is);
map.put(language, model);
}
return new LanguageIdentifier(map);
}
use of zemberek.langid.model.CompressedCharNgramModel in project zemberek-nlp by ahmetaa.
the class LanguageIdentifier method fromInternalModelGroup.
/**
* Loads internal models from internal compressed resource folder. Such as /models/langid has a
* folder named tr_group. It contains a group of language and unk compressed models. for loading
* those modeles, fromInternalModelGroup("tr_group") should be called.
*
* @param groupId internal folder name
* @return LanguageIdentifier
* @throws IOException In case of an IO error.
*/
public static LanguageIdentifier fromInternalModelGroup(String groupId) throws IOException {
Set<String> languages = Sets.newHashSet();
String languageList = "/models/" + groupId + "/langs.txt";
try (InputStream is = Resources.getResource(LanguageIdentifier.class, languageList).openStream()) {
String langLine = SimpleTextReader.trimmingReader(is, "utf-8").asString().trim();
for (String langStr : Splitter.on(",").omitEmptyStrings().trimResults().split(langLine)) {
languages.add(langStr);
}
}
if (languages.size() == 0) {
throw new IllegalArgumentException("No language is provided!");
}
Map<String, CharNgramLanguageModel> map = Maps.newHashMap();
for (String language : languages) {
String resourceName = "/models/" + groupId + "/" + language + ".clm";
InputStream is = Resources.getResource(LanguageIdentifier.class, resourceName).openStream();
if (is == null) {
throw new IllegalArgumentException("No internal model found: " + resourceName);
}
CompressedCharNgramModel model = CompressedCharNgramModel.load(is);
map.put(language, model);
}
return new LanguageIdentifier(map);
}
Aggregations