Search in sources :

Example 1 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class FalseFriendRuleTest method testHintsForGermanSpeakers.

@Test
public void testHintsForGermanSpeakers() throws IOException, ParserConfigurationException, SAXException {
    JLanguageTool langTool = new JLanguageTool(new English(), new German());
    List<RuleMatch> matches = assertErrors(1, "We will berate you.", langTool);
    assertEquals(matches.get(0).getSuggestedReplacements().toString(), "[provide advice, give advice]");
    assertErrors(0, "We will give you advice.", langTool);
    assertErrors(1, "I go to high school in Foocity.", langTool);
    List<RuleMatch> matches2 = assertErrors(1, "The chef", langTool);
    assertEquals("[boss, chief]", matches2.get(0).getSuggestedReplacements().toString());
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 2 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class RuleNumberScalabilityTest method main.

public static void main(String[] args) throws IOException {
    if (args.length != 2) {
        System.out.println("Usage: " + RuleNumberScalabilityTest.class.getSimpleName() + " <languageCode> <text_file>");
        System.exit(1);
    }
    JLanguageTool langTool = new JLanguageTool(Languages.getLanguageForShortCode(args[0]));
    String text = StringTools.readStream(new FileInputStream(args[1]), "utf-8");
    System.out.println("Warmup...");
    langTool.check(text);
    langTool.check(text);
    long baselineTime = getBaselineTime(langTool, text);
    System.out.println("Baseline: " + baselineTime + "ms (time with no pattern rules active)");
    int ruleNumber = langTool.getAllActiveRules().size();
    System.out.println("Total rules: " + ruleNumber);
    int steps = 5;
    int prevActiveRules = -1;
    long prevCleanRunTime = -1;
    for (int i = steps; i > 0; i--) {
        int targetActiveRules = ruleNumber / i;
        deactivateAllRules(langTool);
        for (Rule rule : langTool.getAllRules()) {
            langTool.enableRule(rule.getId());
            if (langTool.getAllActiveRules().size() > targetActiveRules) {
                break;
            }
        }
        int activeRules = langTool.getAllActiveRules().size();
        long startTime = System.currentTimeMillis();
        langTool.check(text);
        long runTime = System.currentTimeMillis() - startTime;
        long cleanRunTime = runTime - baselineTime;
        if (prevActiveRules != -1 && prevCleanRunTime != -1) {
            float ruleFactor = (float) activeRules / prevActiveRules;
            float cleanRuntimeFactor = (float) cleanRunTime / prevCleanRunTime;
            System.out.println("Active rules: " + activeRules + ", runtime: " + runTime + "ms, cleanRunTime: " + cleanRunTime + ", ruleFactor: " + ruleFactor + ", cleanRuntimeFactor: " + cleanRuntimeFactor);
        } else {
            System.out.println("Active rules: " + activeRules + ", runtime: " + runTime + "ms, cleanRunTime: " + cleanRunTime);
        }
        prevActiveRules = activeRules;
        prevCleanRunTime = cleanRunTime;
    }
    System.out.println("ruleFactor = the number of rules compared to the previous run");
    System.out.println("cleanRuntimeFactor = the runtime (without baseline) compared to the previous run");
    System.out.println(" => cleanRuntimeFactor should not grow much more than ruleFactor, otherwise we scale");
    System.out.println(" => badly with respect to the number of rules");
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) Rule(org.languagetool.rules.Rule) FileInputStream(java.io.FileInputStream)

Example 3 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class MultiWordChunkerTest method testDisambiguate.

@Test
public void testDisambiguate() throws Exception {
    Disambiguator chunker = new MultiWordChunker("/pl/multiwords.txt");
    JLanguageTool lt = new JLanguageTool(new English());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("A test... More.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
    assertTrue(tokens[4].getReadings().toString().contains("<ELLIPSIS>"));
    assertTrue(tokens[6].getReadings().toString().contains("</ELLIPSIS>"));
}
Also used : English(org.languagetool.language.English) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 4 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class MultiWordChunkerTest method testDisambiguateMultiSpace.

@Test
public void testDisambiguateMultiSpace() throws Exception {
    Disambiguator chunker = new MultiWordChunker("/uk/multiwords.txt");
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 5 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class RuleTest method testJavaRules.

@Test
public void testJavaRules() throws IOException {
    Set<String> ids = new HashSet<>();
    Set<Class> ruleClasses = new HashSet<>();
    if (Languages.getWithDemoLanguage().size() <= 1) {
        System.err.println("***************************************************************************");
        System.err.println("WARNING: found only these languages - the tests might not be complete:");
        System.err.println(Languages.getWithDemoLanguage());
        System.err.println("***************************************************************************");
    }
    for (Language language : Languages.getWithDemoLanguage()) {
        JLanguageTool lt = new JLanguageTool(language);
        List<Rule> allRules = lt.getAllRules();
        for (Rule rule : allRules) {
            if (!(rule instanceof AbstractPatternRule)) {
                assertIdUniqueness(ids, ruleClasses, language, rule);
                assertIdValidity(language, rule);
                assertTrue(rule.supportsLanguage(language));
                testExamples(rule, lt);
            }
        }
    }
}
Also used : Language(org.languagetool.Language) JLanguageTool(org.languagetool.JLanguageTool) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) HashSet(java.util.HashSet) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) Test(org.junit.Test)

Aggregations

JLanguageTool (org.languagetool.JLanguageTool)184 Test (org.junit.Test)109 RuleMatch (org.languagetool.rules.RuleMatch)57 Before (org.junit.Before)38 German (org.languagetool.language.German)16 Rule (org.languagetool.rules.Rule)16 Catalan (org.languagetool.language.Catalan)14 Ukrainian (org.languagetool.language.Ukrainian)14 English (org.languagetool.language.English)13 Polish (org.languagetool.language.Polish)12 Language (org.languagetool.Language)10 GermanyGerman (org.languagetool.language.GermanyGerman)9 PatternRule (org.languagetool.rules.patterns.PatternRule)9 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 File (java.io.File)7 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)6 Dutch (org.languagetool.language.Dutch)5 French (org.languagetool.language.French)5 ArrayList (java.util.ArrayList)4 FakeLanguage (org.languagetool.FakeLanguage)4