use of org.languagetool.Language in project languagetool by languagetool-org.
the class AfterTheDeadlineChecker method main.
public static void main(String[] args) throws Exception {
if (args.length < 4) {
System.out.println("Usage: " + AfterTheDeadlineChecker.class.getSimpleName() + " <langCode> <atdUrlPrefix> <file...>");
System.out.println(" <langCode> a language code like 'en' for English");
System.out.println(" <atdUrlPrefix> URL prefix of After the Deadline server, like 'http://localhost:1059/checkDocument?data='");
System.out.println(" <sentenceLimit> Maximum number of sentences to check, or 0 for no limit");
System.out.println(" <file...> Wikipedia and/or Tatoeba file(s)");
System.exit(1);
}
Language language = Languages.getLanguageForShortCode(args[0]);
String urlPrefix = args[1];
int maxSentenceCount = Integer.parseInt(args[2]);
List<String> files = Arrays.asList(args).subList(3, args.length);
AfterTheDeadlineChecker atdChecker = new AfterTheDeadlineChecker(urlPrefix, maxSentenceCount);
atdChecker.run(language, files);
}
use of org.languagetool.Language in project languagetool by languagetool-org.
the class Indexer method run.
private static void run(String textFile, String indexDir, String languageCode) throws IOException {
File file = new File(textFile);
if (!file.exists() || !file.canRead()) {
System.out.println("Text file '" + file.getAbsolutePath() + "' does not exist or is not readable, please check the path");
System.exit(1);
}
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
System.out.println("Indexing to directory '" + indexDir + "'...");
try (FSDirectory directory = FSDirectory.open(new File(indexDir).toPath())) {
Language language = Languages.getLanguageForShortCode(languageCode);
try (Indexer indexer = new Indexer(directory, language)) {
indexer.indexText(reader);
}
}
}
System.out.println("Index complete!");
}
use of org.languagetool.Language in project languagetool by languagetool-org.
the class LanguageDetectionEval method getShortestCorrectDetection.
private int getShortestCorrectDetection(String line, Language expectedLanguage) {
totalInputs++;
String[] tokens = line.split("\\s+");
for (int i = tokens.length; i > 0; i--) {
String text = String.join(" ", Arrays.asList(tokens).subList(0, i));
Language detectedLangObj = languageIdentifier.detectLanguage(text);
String detectedLang = null;
if (detectedLangObj != null) {
detectedLang = detectedLangObj.getShortCode();
}
if (detectedLang == null && i == tokens.length) {
throw new DetectionException("Detection failed for '" + line + "', detected <null>");
} else if (detectedLang != null && !expectedLanguage.getShortCode().equals(detectedLang)) {
if (i == tokens.length) {
throw new DetectionException("Detection failed for '" + line + "', detected " + detectedLang);
} else {
int textLength = getTextLength(tokens, i + 1);
//System.out.println("TOO SHORT: " + text + " => " + detectedLang + " (" + textLength + ")");
return textLength;
}
}
}
return tokens[0].length();
}
use of org.languagetool.Language in project languagetool by languagetool-org.
the class LanguageDetectionEval method main.
public static void main(String[] args) throws IOException {
LanguageDetectionEval eval = new LanguageDetectionEval();
long startTime = System.currentTimeMillis();
for (Language language : Languages.get()) {
eval.evaluate(language);
}
long endTime = System.currentTimeMillis();
System.out.println();
System.out.println("Time: " + (endTime - startTime) + "ms");
System.out.println("Total detection failures: " + eval.totalFailures + "/" + eval.totalInputs);
}
use of org.languagetool.Language in project languagetool by languagetool-org.
the class PatternRuleMatcher method concatMatches.
/**
* Concatenates the matches, and takes care of phrases (including inflection
* using synthesis).
* @param start Position of the element as referenced by match element in the rule.
* @param index The index of the element found in the matching sentence.
* @param tokenIndex The position of the token in the AnalyzedTokenReadings array.
* @param tokens Array of AnalyzedTokenReadings
* @return @String[] Array of concatenated strings
*/
private String[] concatMatches(int start, int index, int tokenIndex, AnalyzedTokenReadings[] tokens, int nextTokenPos, List<Match> suggestionMatches) throws IOException {
String[] finalMatch;
int len = phraseLen(index);
Language language = rule.language;
if (len == 1) {
int skippedTokens = nextTokenPos - tokenIndex;
MatchState matchState = suggestionMatches.get(start).createState(language.getSynthesizer(), tokens, tokenIndex - 1, skippedTokens);
finalMatch = matchState.toFinalString(language);
if (suggestionMatches.get(start).checksSpelling() && finalMatch.length == 1 && "".equals(finalMatch[0])) {
finalMatch = new String[1];
finalMatch[0] = MISTAKE;
}
} else {
List<String[]> matchList = new ArrayList<>();
for (int i = 0; i < len; i++) {
int skippedTokens = nextTokenPos - (tokenIndex + i);
MatchState matchState = suggestionMatches.get(start).createState(language.getSynthesizer(), tokens, tokenIndex - 1 + i, skippedTokens);
matchList.add(matchState.toFinalString(language));
}
return combineLists(matchList.toArray(new String[matchList.size()][]), new String[matchList.size()], 0, language);
}
return finalMatch;
}
Aggregations