Search in sources :

Example 31 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class MorfologikPolishSpellerRuleTest method testMorfologikSpeller.

@Test
public void testMorfologikSpeller() throws IOException {
    final MorfologikPolishSpellerRule rule = new MorfologikPolishSpellerRule(TestTools.getMessages("pl"), new Polish());
    final JLanguageTool langTool = new JLanguageTool(new Polish());
    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("To jest test bez jakiegokolwiek błędu.")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Żółw na starość wydziela dziwną woń.")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Żółw na starość wydziela dziwną woń numer 1234.")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("W MI-6 pracuje 15-letni agent.")).length);
    //test for "LanguageTool":
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("LanguageTool jest świetny!")).length);
    //test for the ignored uppercase word "Gdym":
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Gdym to zobaczył, zdębiałem.")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
    //compound word with ignored part "techniczno"
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Bogactwo nie rośnie proporcjonalnie do jej rozwoju techniczno-terytorialnego.")).length);
    //compound word with one of the compound prefixes:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Antypostmodernistyczna batalia hiperfilozofów")).length);
    //compound words: "trzynastobitowy", "zgniłożółty"
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Trzynastobitowe przystawki w kolorze zgniłożółtym")).length);
    //incorrect sentences:
    final RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("Zolw"));
    // check match positions:
    assertEquals(1, matches.length);
    assertEquals(0, matches[0].getFromPos());
    assertEquals(4, matches[0].getToPos());
    assertEquals("Żółw", matches[0].getSuggestedReplacements().get(0));
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("aõh")).length);
    //tokenizing on prefixes niby- and quasi-
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Niby-artysta spotkał się z quasi-opiekunem i niby-Francuzem.")).length);
    final RuleMatch[] prunedMatches = rule.match(langTool.getAnalyzedSentence("Clarkem"));
    assertEquals(1, prunedMatches.length);
    assertEquals(5, prunedMatches[0].getSuggestedReplacements().size());
    assertEquals("Clarke", prunedMatches[0].getSuggestedReplacements().get(0));
    assertEquals("Clarkiem", prunedMatches[0].getSuggestedReplacements().get(1));
    assertEquals("Ciarkę", prunedMatches[0].getSuggestedReplacements().get(2));
    assertEquals("Clarkom", prunedMatches[0].getSuggestedReplacements().get(3));
    assertEquals("Czarkę", prunedMatches[0].getSuggestedReplacements().get(4));
    // There should be a match, this is not a prefix!
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("premoc")).length);
    // "0" instead "o"...
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("dziwneg0")).length);
}
Also used : Polish(org.languagetool.language.Polish) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 32 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class PolishWordRepeatRuleTest method testRule.

@Test
public void testRule() throws IOException {
    final PolishWordRepeatRule rule = new PolishWordRepeatRule(TestTools.getEnglishMessages());
    RuleMatch[] matches;
    JLanguageTool langTool = new JLanguageTool(new Polish());
    //correct
    matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie próbne."));
    assertEquals(0, matches.length);
    matches = rule.match(langTool.getAnalyzedSentence("On tak się bardzo nie martwił, bo przecież musiał się umyć."));
    assertEquals(0, matches.length);
    //repeated prepositions, don't count'em
    matches = rule.match(langTool.getAnalyzedSentence("Na dyskotece tańczył jeszcze, choć był na bani."));
    assertEquals(0, matches.length);
    // sf bug report:
    matches = rule.match(langTool.getAnalyzedSentence("Żadnych „ale”."));
    assertEquals(0, matches.length);
    //incorrect
    matches = rule.match(langTool.getAnalyzedSentence("Był on bowiem pięknym strzelcem bowiem."));
    assertEquals(1, matches.length);
    matches = rule.match(langTool.getAnalyzedSentence("Mówiła długo, żeby tylko mówić długo."));
    assertEquals(2, matches.length);
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Polish(org.languagetool.language.Polish) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 33 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class MorfologikSpanishSpellerRuleTest method testMorfologikSpeller.

@Test
public void testMorfologikSpeller() throws IOException {
    Spanish language = new Spanish();
    MorfologikSpanishSpellerRule rule = new MorfologikSpanishSpellerRule(TestTools.getMessages("en"), language);
    JLanguageTool langTool = new JLanguageTool(language);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Escriba un texto aquí. LanguageTool le ayudará a afrontar algunas dificultades propias de la escritura.")).length);
    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales."));
    assertEquals(1, matches.length);
    assertEquals(59, matches[0].getFromPos());
    assertEquals(71, matches[0].getToPos());
    assertEquals("ortográficos", matches[0].getSuggestedReplacements().get(0));
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Spanish(org.languagetool.language.Spanish) Test(org.junit.Test)

Example 34 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class FalseFriendRuleTest method testHintsForGermanSpeakers.

@Test
public void testHintsForGermanSpeakers() throws IOException, ParserConfigurationException, SAXException {
    JLanguageTool langTool = new JLanguageTool(new English(), new German());
    List<RuleMatch> matches = assertErrors(1, "We will berate you.", langTool);
    assertEquals(matches.get(0).getSuggestedReplacements().toString(), "[provide advice, give advice]");
    assertErrors(0, "We will give you advice.", langTool);
    assertErrors(1, "I go to high school in Foocity.", langTool);
    List<RuleMatch> matches2 = assertErrors(1, "The chef", langTool);
    assertEquals("[boss, chief]", matches2.get(0).getSuggestedReplacements().toString());
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 35 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class PerformanceTest2 method run.

private void run(String languageCode, File textFile) throws IOException {
    String text = StringTools.readStream(new FileInputStream(textFile), "utf-8");
    System.out.println("Text length: " + text.length());
    Random rnd = new Random(42);
    Language language = Languages.getLanguageForShortCode(languageCode);
    long totalTime = 0;
    for (int i = 0; i < RUNS; i++) {
        int beginIndex = rnd.nextInt(text.length());
        int endIndex = Math.min(beginIndex + MAX_TEXT_LENGTH, text.length() - 1);
        String subText = text.substring(beginIndex, endIndex);
        long startTime = System.currentTimeMillis();
        MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(language);
        List<RuleMatch> matches = langTool.check(subText);
        long runTime = System.currentTimeMillis() - startTime;
        langTool.shutdown();
        if (i >= SKIP) {
            totalTime += runTime;
            System.out.println("Time: " + runTime + "ms (" + matches.size() + " matches)");
        } else {
            System.out.println("Time: " + runTime + "ms (" + matches.size() + " matches) - skipped because of warm-up");
        }
    }
    System.out.println("Avg. Time: " + (float) totalTime / RUNS);
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Random(java.util.Random) Language(org.languagetool.Language) MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool) FileInputStream(java.io.FileInputStream)

Aggregations

RuleMatch (org.languagetool.rules.RuleMatch)144 Test (org.junit.Test)64 JLanguageTool (org.languagetool.JLanguageTool)54 ArrayList (java.util.ArrayList)30 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)14 Rule (org.languagetool.rules.Rule)14 Language (org.languagetool.Language)10 PatternRule (org.languagetool.rules.patterns.PatternRule)10 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 Ukrainian (org.languagetool.language.Ukrainian)8 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)8 Matcher (java.util.regex.Matcher)7 English (org.languagetool.language.English)7 IOException (java.io.IOException)6 Catalan (org.languagetool.language.Catalan)6 Polish (org.languagetool.language.Polish)6 GermanyGerman (org.languagetool.language.GermanyGerman)5 AnnotatedText (org.languagetool.markup.AnnotatedText)5 PatternToken (org.languagetool.rules.patterns.PatternToken)5 AnalyzedToken (org.languagetool.AnalyzedToken)4