Search in sources :

Example 46 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class LanguageDetectionEval method getShortestCorrectDetection.

private int getShortestCorrectDetection(String line, Language expectedLanguage) {
    totalInputs++;
    String[] tokens = line.split("\\s+");
    for (int i = tokens.length; i > 0; i--) {
        String text = String.join(" ", Arrays.asList(tokens).subList(0, i));
        Language detectedLangObj = languageIdentifier.detectLanguage(text);
        String detectedLang = null;
        if (detectedLangObj != null) {
            detectedLang = detectedLangObj.getShortCode();
        }
        if (detectedLang == null && i == tokens.length) {
            throw new DetectionException("Detection failed for '" + line + "', detected <null>");
        } else if (detectedLang != null && !expectedLanguage.getShortCode().equals(detectedLang)) {
            if (i == tokens.length) {
                throw new DetectionException("Detection failed for '" + line + "', detected " + detectedLang);
            } else {
                int textLength = getTextLength(tokens, i + 1);
                //System.out.println("TOO SHORT: " + text + " => " + detectedLang + " (" + textLength + ")");
                return textLength;
            }
        }
    }
    return tokens[0].length();
}
Also used : Language(org.languagetool.Language)

Example 47 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class LanguageDetectionEval method main.

public static void main(String[] args) throws IOException {
    LanguageDetectionEval eval = new LanguageDetectionEval();
    long startTime = System.currentTimeMillis();
    for (Language language : Languages.get()) {
        eval.evaluate(language);
    }
    long endTime = System.currentTimeMillis();
    System.out.println();
    System.out.println("Time: " + (endTime - startTime) + "ms");
    System.out.println("Total detection failures: " + eval.totalFailures + "/" + eval.totalInputs);
}
Also used : Language(org.languagetool.Language)

Example 48 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class PatternRuleMatcher method concatMatches.

/**
   * Concatenates the matches, and takes care of phrases (including inflection
   * using synthesis).
   * @param start Position of the element as referenced by match element in the rule.
   * @param index The index of the element found in the matching sentence.
   * @param tokenIndex The position of the token in the AnalyzedTokenReadings array.
   * @param tokens Array of AnalyzedTokenReadings
   * @return @String[] Array of concatenated strings
   */
private String[] concatMatches(int start, int index, int tokenIndex, AnalyzedTokenReadings[] tokens, int nextTokenPos, List<Match> suggestionMatches) throws IOException {
    String[] finalMatch;
    int len = phraseLen(index);
    Language language = rule.language;
    if (len == 1) {
        int skippedTokens = nextTokenPos - tokenIndex;
        MatchState matchState = suggestionMatches.get(start).createState(language.getSynthesizer(), tokens, tokenIndex - 1, skippedTokens);
        finalMatch = matchState.toFinalString(language);
        if (suggestionMatches.get(start).checksSpelling() && finalMatch.length == 1 && "".equals(finalMatch[0])) {
            finalMatch = new String[1];
            finalMatch[0] = MISTAKE;
        }
    } else {
        List<String[]> matchList = new ArrayList<>();
        for (int i = 0; i < len; i++) {
            int skippedTokens = nextTokenPos - (tokenIndex + i);
            MatchState matchState = suggestionMatches.get(start).createState(language.getSynthesizer(), tokens, tokenIndex - 1 + i, skippedTokens);
            matchList.add(matchState.toFinalString(language));
        }
        return combineLists(matchList.toArray(new String[matchList.size()][]), new String[matchList.size()], 0, language);
    }
    return finalMatch;
}
Also used : Language(org.languagetool.Language) ArrayList(java.util.ArrayList)

Example 49 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class LanguageBuilder method makeLanguage.

/**
   * Takes an XML file named <tt>rules-xx-language.xml</tt>,
   * e.g. <tt>rules-de-German.xml</tt> and builds
   * a Language object for that language.
   */
private static Language makeLanguage(File file, boolean isAdditional) throws IllegalAccessException, InstantiationException {
    Objects.requireNonNull(file, "file cannot be null");
    if (!file.getName().endsWith(".xml")) {
        throw new RuleFilenameException(file);
    }
    String[] parts = file.getName().split("-");
    boolean startsWithRules = parts[0].equals("rules");
    boolean secondPartHasCorrectLength = parts.length == 3 && (parts[1].length() == "en".length() || parts[1].length() == "ast".length() || parts[1].length() == "en_US".length());
    if (!startsWithRules || !secondPartHasCorrectLength) {
        throw new RuleFilenameException(file);
    }
    //TODO: when the XML file is mergeable with
    // other rules (check this in the XML Rule Loader by using rules[@integrate='add']?),
    // subclass the existing language,
    //and adjust the settings if any are set in the rule file default configuration set
    Language newLanguage;
    if (Languages.isLanguageSupported(parts[1])) {
        Language baseLanguage = Languages.getLanguageForShortCode(parts[1]).getClass().newInstance();
        newLanguage = new ExtendedLanguage(baseLanguage, parts[2].replace(".xml", ""), file);
    } else {
        newLanguage = new Language() {

            @Override
            public Locale getLocale() {
                return new Locale(getShortCode());
            }

            @Override
            public Contributor[] getMaintainers() {
                return null;
            }

            @Override
            public String getShortCode() {
                if (parts[1].length() == 2) {
                    return parts[1];
                }
                //en as in en_US
                return parts[1].split("_")[0];
            }

            @Override
            public String[] getCountries() {
                if (parts[1].length() == 2) {
                    return new String[] { "" };
                }
                //US as in en_US
                return new String[] { parts[1].split("_")[1] };
            }

            @Override
            public String getName() {
                return parts[2].replace(".xml", "");
            }

            @Override
            public List<Rule> getRelevantRules(ResourceBundle messages) {
                return Collections.emptyList();
            }

            @Override
            public List<String> getRuleFileNames() {
                List<String> ruleFiles = new ArrayList<>();
                ruleFiles.add(file.getAbsolutePath());
                return ruleFiles;
            }

            @Override
            public boolean isExternal() {
                return isAdditional;
            }
        };
    }
    return newLanguage;
}
Also used : Language(org.languagetool.Language)

Example 50 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class LanguageIdentifier method getLanguageCodes.

private static List<String> getLanguageCodes() {
    List<String> langCodes = new ArrayList<>();
    for (Language lang : Languages.get()) {
        String langCode = lang.getShortCode();
        boolean ignore = lang.isVariant() || ignoreLangCodes.contains(langCode) || externalLangCodes.contains(langCode);
        if (ignore) {
            continue;
        }
        if ("zh".equals(langCode)) {
            langCodes.add("zh-CN");
            langCodes.add("zh-TW");
        } else {
            langCodes.add(langCode);
        }
    }
    return langCodes;
}
Also used : Language(org.languagetool.Language) ArrayList(java.util.ArrayList)

Aggregations

Language (org.languagetool.Language)84 Test (org.junit.Test)23 File (java.io.File)15 ArrayList (java.util.ArrayList)12 JLanguageTool (org.languagetool.JLanguageTool)11 Rule (org.languagetool.rules.Rule)11 RuleMatch (org.languagetool.rules.RuleMatch)10 IOException (java.io.IOException)7 Ignore (org.junit.Ignore)6 StringTools.readerToString (org.languagetool.tools.StringTools.readerToString)5 InputStream (java.io.InputStream)4 English (org.languagetool.language.English)4 BitextRule (org.languagetool.rules.bitext.BitextRule)4 URL (java.net.URL)3 HashSet (java.util.HashSet)3 MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)3 AmericanEnglish (org.languagetool.language.AmericanEnglish)3 LanguageModel (org.languagetool.languagemodel.LanguageModel)3 LuceneLanguageModel (org.languagetool.languagemodel.LuceneLanguageModel)3 BufferedReader (java.io.BufferedReader)2