use of com.optimaize.langdetect.profiles.LanguageProfile in project languagetool by languagetool-org.
the class LanguageDetectionTrainer method main.
public static void main(String[] args) throws IOException {
if (args.length != 3) {
System.out.println("Usage: " + LanguageDetectionTrainer.class.getName() + " <languageCode> <plainTextFile> <minimalFrequency>");
System.exit(1);
}
String langCode = args[0];
String fileName = args[1];
int minimalFrequency = Integer.parseInt(args[2]);
String text = IOUtils.toString(new FileReader(fileName));
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forIndexingCleanText();
TextObject inputText = textObjectFactory.create().append(text);
LanguageProfile languageProfile = new LanguageProfileBuilder(langCode).ngramExtractor(NgramExtractors.standard()).minimalFrequency(minimalFrequency).addText(inputText).build();
// current dir
File outputDir = new File(System.getProperty("user.dir"));
new LanguageProfileWriter().writeToDirectory(languageProfile, outputDir);
System.out.println("Language profile written to " + new File(outputDir, langCode).getAbsolutePath());
}
use of com.optimaize.langdetect.profiles.LanguageProfile in project languagetool by languagetool-org.
the class LanguageIdentifier method loadProfiles.
private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
LanguageProfileReader profileReader = new LanguageProfileReader();
List<LanguageProfile> profiles = profileReader.read(langCodes);
for (String externalLangCode : externalLangCodes) {
String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
if (JLanguageTool.getDataBroker().resourceExists(profilePath)) {
// not all languages are always available
try (InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath)) {
profiles.add(new LanguageProfileReader().read(profile));
}
}
}
return profiles;
}
use of com.optimaize.langdetect.profiles.LanguageProfile in project tika by apache.
the class OptimaizeLangDetector method loadModels.
@Override
public LanguageDetector loadModels() throws IOException {
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
// FUTURE when the "language-detector" project supports short profiles, check if
// isShortText() returns true and switch to those.
languages = new HashSet<>();
for (LanguageProfile profile : languageProfiles) {
languages.add(makeLanguageName(profile.getLocale()));
}
detector = createDetector(languageProfiles);
return this;
}
Aggregations