Search in sources :

Example 56 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class AfterTheDeadlineChecker method main.

public static void main(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Usage: " + AfterTheDeadlineChecker.class.getSimpleName() + " <langCode> <atdUrlPrefix> <file...>");
        System.out.println("   <langCode>      a language code like 'en' for English");
        System.out.println("   <atdUrlPrefix>  URL prefix of After the Deadline server, like 'http://localhost:1059/checkDocument?data='");
        System.out.println("   <sentenceLimit> Maximum number of sentences to check, or 0 for no limit");
        System.out.println("   <file...>       Wikipedia and/or Tatoeba file(s)");
        System.exit(1);
    }
    Language language = Languages.getLanguageForShortCode(args[0]);
    String urlPrefix = args[1];
    int maxSentenceCount = Integer.parseInt(args[2]);
    List<String> files = Arrays.asList(args).subList(3, args.length);
    AfterTheDeadlineChecker atdChecker = new AfterTheDeadlineChecker(urlPrefix, maxSentenceCount);
    atdChecker.run(language, files);
}
Also used : Language(org.languagetool.Language)

Example 57 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class Indexer method run.

private static void run(String textFile, String indexDir, String languageCode) throws IOException {
    File file = new File(textFile);
    if (!file.exists() || !file.canRead()) {
        System.out.println("Text file '" + file.getAbsolutePath() + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }
    try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
        System.out.println("Indexing to directory '" + indexDir + "'...");
        try (FSDirectory directory = FSDirectory.open(new File(indexDir).toPath())) {
            Language language = Languages.getLanguageForShortCode(languageCode);
            try (Indexer indexer = new Indexer(directory, language)) {
                indexer.indexText(reader);
            }
        }
    }
    System.out.println("Index complete!");
}
Also used : Language(org.languagetool.Language) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 58 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class LanguageDetectionEval method getShortestCorrectDetection.

private int getShortestCorrectDetection(String line, Language expectedLanguage) {
    totalInputs++;
    String[] tokens = line.split("\\s+");
    for (int i = tokens.length; i > 0; i--) {
        String text = String.join(" ", Arrays.asList(tokens).subList(0, i));
        Language detectedLangObj = languageIdentifier.detectLanguage(text);
        String detectedLang = null;
        if (detectedLangObj != null) {
            detectedLang = detectedLangObj.getShortCode();
        }
        if (detectedLang == null && i == tokens.length) {
            throw new DetectionException("Detection failed for '" + line + "', detected <null>");
        } else if (detectedLang != null && !expectedLanguage.getShortCode().equals(detectedLang)) {
            if (i == tokens.length) {
                throw new DetectionException("Detection failed for '" + line + "', detected " + detectedLang);
            } else {
                int textLength = getTextLength(tokens, i + 1);
                //System.out.println("TOO SHORT: " + text + " => " + detectedLang + " (" + textLength + ")");
                return textLength;
            }
        }
    }
    return tokens[0].length();
}
Also used : Language(org.languagetool.Language)

Example 59 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class LanguageDetectionEval method main.

public static void main(String[] args) throws IOException {
    LanguageDetectionEval eval = new LanguageDetectionEval();
    long startTime = System.currentTimeMillis();
    for (Language language : Languages.get()) {
        eval.evaluate(language);
    }
    long endTime = System.currentTimeMillis();
    System.out.println();
    System.out.println("Time: " + (endTime - startTime) + "ms");
    System.out.println("Total detection failures: " + eval.totalFailures + "/" + eval.totalInputs);
}
Also used : Language(org.languagetool.Language)

Example 60 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class PatternRuleMatcher method concatMatches.

/**
   * Concatenates the matches, and takes care of phrases (including inflection
   * using synthesis).
   * @param start Position of the element as referenced by match element in the rule.
   * @param index The index of the element found in the matching sentence.
   * @param tokenIndex The position of the token in the AnalyzedTokenReadings array.
   * @param tokens Array of AnalyzedTokenReadings
   * @return @String[] Array of concatenated strings
   */
private String[] concatMatches(int start, int index, int tokenIndex, AnalyzedTokenReadings[] tokens, int nextTokenPos, List<Match> suggestionMatches) throws IOException {
    String[] finalMatch;
    int len = phraseLen(index);
    Language language = rule.language;
    if (len == 1) {
        int skippedTokens = nextTokenPos - tokenIndex;
        MatchState matchState = suggestionMatches.get(start).createState(language.getSynthesizer(), tokens, tokenIndex - 1, skippedTokens);
        finalMatch = matchState.toFinalString(language);
        if (suggestionMatches.get(start).checksSpelling() && finalMatch.length == 1 && "".equals(finalMatch[0])) {
            finalMatch = new String[1];
            finalMatch[0] = MISTAKE;
        }
    } else {
        List<String[]> matchList = new ArrayList<>();
        for (int i = 0; i < len; i++) {
            int skippedTokens = nextTokenPos - (tokenIndex + i);
            MatchState matchState = suggestionMatches.get(start).createState(language.getSynthesizer(), tokens, tokenIndex - 1 + i, skippedTokens);
            matchList.add(matchState.toFinalString(language));
        }
        return combineLists(matchList.toArray(new String[matchList.size()][]), new String[matchList.size()], 0, language);
    }
    return finalMatch;
}
Also used : Language(org.languagetool.Language) ArrayList(java.util.ArrayList)

Aggregations

Language (org.languagetool.Language)84 Test (org.junit.Test)23 File (java.io.File)15 ArrayList (java.util.ArrayList)12 JLanguageTool (org.languagetool.JLanguageTool)11 Rule (org.languagetool.rules.Rule)11 RuleMatch (org.languagetool.rules.RuleMatch)10 IOException (java.io.IOException)7 Ignore (org.junit.Ignore)6 StringTools.readerToString (org.languagetool.tools.StringTools.readerToString)5 InputStream (java.io.InputStream)4 English (org.languagetool.language.English)4 BitextRule (org.languagetool.rules.bitext.BitextRule)4 URL (java.net.URL)3 HashSet (java.util.HashSet)3 MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)3 AmericanEnglish (org.languagetool.language.AmericanEnglish)3 LanguageModel (org.languagetool.languagemodel.LanguageModel)3 LuceneLanguageModel (org.languagetool.languagemodel.LuceneLanguageModel)3 BufferedReader (java.io.BufferedReader)2