Search in sources :

Example 6 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class WordListValidatorTest method testWordListValidity.

@Test
public void testWordListValidity() throws IOException {
    Set<String> checked = new HashSet<>();
    for (Language lang : Languages.get()) {
        if (lang.getShortCode().equals("ru")) {
            // skipping, Cyrillic chars not part of the validation yet
            continue;
        }
        JLanguageTool lt = new JLanguageTool(lang);
        List<Rule> rules = lt.getAllActiveRules();
        for (Rule rule : rules) {
            if (rule instanceof SpellingCheckRule) {
                SpellingCheckRule sRule = (SpellingCheckRule) rule;
                String file = sRule.getSpellingFileName();
                if (JLanguageTool.getDataBroker().resourceExists(file) && !checked.contains(file)) {
                    System.out.println("Checking " + file);
                    CachingWordListLoader loader = new CachingWordListLoader();
                    List<String> words = loader.loadWords(file);
                    validateWords(words, file);
                    checked.add(file);
                }
            }
        }
    }
}
Also used : Language(org.languagetool.Language) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) JLanguageTool(org.languagetool.JLanguageTool) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) CachingWordListLoader(org.languagetool.rules.spelling.CachingWordListLoader) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class DisambiguationRuleTest method testDisambiguationRulesFromXML.

private void testDisambiguationRulesFromXML(Set<Language> ignoredLanguages) throws IOException, ParserConfigurationException, SAXException {
    for (Language lang : Languages.getWithDemoLanguage()) {
        if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
            continue;
        }
        if (lang.isVariant()) {
            System.out.println("Skipping variant: " + lang);
            continue;
        }
        System.out.println("Running disambiguation tests for " + lang.getName() + "...");
        DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
        JLanguageTool languageTool = new JLanguageTool(lang);
        if (!(languageTool.getLanguage().getDisambiguator() instanceof DemoDisambiguator)) {
            long startTime = System.currentTimeMillis();
            String name = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortCode() + "/disambiguation.xml";
            validateRuleFile(name);
            List<DisambiguationPatternRule> rules = ruleLoader.getRules(ruleLoader.getClass().getResourceAsStream(name));
            for (DisambiguationPatternRule rule : rules) {
                PatternTestTools.warnIfRegexpSyntaxNotKosher(rule.getPatternTokens(), rule.getId(), rule.getSubId(), lang);
            }
            testDisambiguationRulesFromXML(rules, languageTool, lang);
            long endTime = System.currentTimeMillis();
            System.out.println(rules.size() + " rules tested (" + (endTime - startTime) + "ms)");
        }
    }
}
Also used : DemoDisambiguator(org.languagetool.tagging.disambiguation.xx.DemoDisambiguator) Language(org.languagetool.Language) JLanguageTool(org.languagetool.JLanguageTool)

Example 8 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class ExampleSentenceCorrectionCreator method run.

private void run(Language lang) throws IOException {
    File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
    if (!basePath.exists()) {
        throw new RuntimeException("basePath does not exist: " + basePath);
    }
    String langCode = lang.getShortCode();
    File xml = new File(basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml");
    List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
    JLanguageTool tool = new JLanguageTool(lang);
    for (Rule rule : tool.getAllRules()) {
        if (!(rule instanceof PatternRule)) {
            continue;
        }
        List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
        for (IncorrectExample incorrectExample : incorrectExamples) {
            checkCorrections(rule, incorrectExample, xmlLines, tool);
        }
    }
    System.err.println("Added corrections: " + addedCorrectionsCount);
    for (String xmlLine : xmlLines) {
        System.out.println(xmlLine);
    }
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) JLanguageTool(org.languagetool.JLanguageTool) FileReader(java.io.FileReader) Rule(org.languagetool.rules.Rule) PatternRule(org.languagetool.rules.patterns.PatternRule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) IncorrectExample(org.languagetool.rules.IncorrectExample) File(java.io.File)

Example 9 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class ExampleSentencePrinter method run.

private void run(Language lang) throws IOException {
    File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
    if (!basePath.exists()) {
        throw new RuntimeException("basePath does not exist: " + basePath);
    }
    JLanguageTool tool = new JLanguageTool(lang);
    System.out.println("<html>");
    System.out.println("<head>");
    System.out.println("  <title>LanguageTool examples sentences</title>");
    System.out.println("  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
    System.out.println("</head>");
    System.out.println("<body>");
    int i = 1;
    for (Rule rule : tool.getAllRules()) {
        List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
        if (incorrectExamples.size() > 0) {
            String example = incorrectExamples.get(0).getExample().replace("<marker>", "<b>").replace("</marker>", "</b>");
            System.out.println(i + ". " + example + " [" + rule.getId() + "]<br>");
            i++;
        }
    }
    System.out.println("</body>");
    System.out.println("</html>");
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) Rule(org.languagetool.rules.Rule) IncorrectExample(org.languagetool.rules.IncorrectExample) File(java.io.File)

Example 10 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class LongSentenceRuleTest method testMatch.

@Test
public void testMatch() throws Exception {
    JLanguageTool languageTool = new JLanguageTool(TestTools.getDemoLanguage());
    LongSentenceRule rule = new LongSentenceRule(TestTools.getEnglishMessages());
    assertNoMatch(" is a rather short text.", rule, languageTool);
    assertMatch("Now this is not " + "a a a a a a a a a a a " + "a a a a a a a a a a a " + "a a a a a a a a a a a " + "rather that short text.", rule, languageTool);
    LongSentenceRule shortRule = new LongSentenceRule(TestTools.getEnglishMessages(), 6);
    assertNoMatch("This is a rather short text.", shortRule, languageTool);
    assertMatch("This is also a rather short text.", shortRule, languageTool);
    assertNoMatch("These ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ don't count.", shortRule, languageTool);
    assertNoMatch("one two three four five six.", shortRule, languageTool);
    assertNoMatch("one two three (four) five six.", shortRule, languageTool);
    assertMatch("one two three four five six seven.", shortRule, languageTool);
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Aggregations

JLanguageTool (org.languagetool.JLanguageTool)184 Test (org.junit.Test)109 RuleMatch (org.languagetool.rules.RuleMatch)57 Before (org.junit.Before)38 German (org.languagetool.language.German)16 Rule (org.languagetool.rules.Rule)16 Catalan (org.languagetool.language.Catalan)14 Ukrainian (org.languagetool.language.Ukrainian)14 English (org.languagetool.language.English)13 Polish (org.languagetool.language.Polish)12 Language (org.languagetool.Language)10 GermanyGerman (org.languagetool.language.GermanyGerman)9 PatternRule (org.languagetool.rules.patterns.PatternRule)9 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 File (java.io.File)7 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)6 Dutch (org.languagetool.language.Dutch)5 French (org.languagetool.language.French)5 ArrayList (java.util.ArrayList)4 FakeLanguage (org.languagetool.FakeLanguage)4