Search in sources :

Example 11 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class PatternRuleLoaderTest method testGetRules.

@Test
public void testGetRules() throws Exception {
    PatternRuleLoader prg = new PatternRuleLoader();
    String name = "/xx/grammar.xml";
    List<AbstractPatternRule> rules = prg.getRules(JLanguageTool.getDataBroker().getFromRulesDirAsStream(name), name);
    assertTrue(rules.size() >= 30);
    Rule demoRule1 = getRuleById("DEMO_RULE", rules);
    assertEquals("http://fake-server.org/foo-bar-error-explained", demoRule1.getUrl().toString());
    assertEquals("[This is <marker>fuu bah</marker>.]", demoRule1.getCorrectExamples().toString());
    List<IncorrectExample> incorrectExamples = demoRule1.getIncorrectExamples();
    assertEquals(1, incorrectExamples.size());
    assertEquals("This is <marker>foo bar</marker>.", incorrectExamples.get(0).getExample());
    Rule demoRule2 = getRuleById("API_OUTPUT_TEST_RULE", rules);
    assertNull(demoRule2.getUrl());
    assertEquals(ITSIssueType.Uncategorized, demoRule1.getLocQualityIssueType());
    assertEquals("tag inheritance failed", ITSIssueType.Addition, getRuleById("TEST_GO", rules).getLocQualityIssueType());
    assertEquals("tag inheritance overwrite failed", ITSIssueType.Uncategorized, getRuleById("TEST_PHRASES1", rules).getLocQualityIssueType());
    assertEquals("tag inheritance overwrite failed", ITSIssueType.Characters, getRuleById("test_include", rules).getLocQualityIssueType());
    List<Rule> groupRules1 = getRulesById("test_spacebefore", rules);
    assertEquals("tag inheritance form category failed", ITSIssueType.Addition, groupRules1.get(0).getLocQualityIssueType());
    assertEquals("tag inheritance overwrite failed", ITSIssueType.Duplication, groupRules1.get(1).getLocQualityIssueType());
    List<Rule> groupRules2 = getRulesById("test_unification_with_negation", rules);
    assertEquals("tag inheritance from rulegroup failed", ITSIssueType.Grammar, groupRules2.get(0).getLocQualityIssueType());
    Set<String> categories = getCategoryNames(rules);
    assertEquals(4, categories.size());
    assertTrue(categories.contains("misc"));
    assertTrue(categories.contains("otherCategory"));
    assertTrue(categories.contains("Test tokens with min and max attributes"));
    assertTrue(categories.contains("A category that's off by default"));
    PatternRule demoRuleWithChunk = (PatternRule) getRuleById("DEMO_CHUNK_RULE", rules);
    List<PatternToken> patternTokens = demoRuleWithChunk.getPatternTokens();
    assertEquals(2, patternTokens.size());
    assertEquals(null, patternTokens.get(1).getPOStag());
    assertEquals(new ChunkTag("B-NP-singular"), patternTokens.get(1).getChunkTag());
    List<Rule> orRules = getRulesById("GROUP_WITH_URL", rules);
    assertEquals(3, orRules.size());
    assertEquals("http://fake-server.org/rule-group-url", orRules.get(0).getUrl().toString());
    assertEquals("http://fake-server.org/rule-group-url-overwrite", orRules.get(1).getUrl().toString());
    assertEquals("http://fake-server.org/rule-group-url", orRules.get(2).getUrl().toString());
    assertEquals("short message on rule group", ((PatternRule) orRules.get(0)).getShortMessage());
    assertEquals("overwriting short message", ((PatternRule) orRules.get(1)).getShortMessage());
    assertEquals("short message on rule group", ((PatternRule) orRules.get(2)).getShortMessage());
    // make sure URLs don't leak to the next rule:
    List<Rule> orRules2 = getRulesById("OR_GROUPS", rules);
    for (Rule rule : orRules2) {
        assertNull("http://fake-server.org/rule-group-url", rule.getUrl());
    }
    Rule nextRule = getRuleById("DEMO_CHUNK_RULE", rules);
    assertNull("http://fake-server.org/rule-group-url", nextRule.getUrl());
}
Also used : ChunkTag(org.languagetool.chunking.ChunkTag) Rule(org.languagetool.rules.Rule) IncorrectExample(org.languagetool.rules.IncorrectExample) Test(org.junit.Test)

Example 12 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class PatternRuleTest method testBadSentences.

private void testBadSentences(JLanguageTool languageTool, JLanguageTool allRulesLanguageTool, Language lang, Map<String, AbstractPatternRule> complexRules, AbstractPatternRule rule) throws IOException {
    List<IncorrectExample> badSentences = rule.getIncorrectExamples();
    if (badSentences.size() == 0) {
        fail("No incorrect examples found for rule " + rule.getFullId());
    }
    // necessary for XML Pattern rules containing <or>
    List<AbstractPatternRule> rules = allRulesLanguageTool.getPatternRulesByIdAndSubId(rule.getId(), rule.getSubId());
    for (IncorrectExample origBadExample : badSentences) {
        // enable indentation use
        String origBadSentence = origBadExample.getExample().replaceAll("[\\n\\t]+", "");
        List<String> expectedCorrections = origBadExample.getCorrections();
        int expectedMatchStart = origBadSentence.indexOf("<marker>");
        int expectedMatchEnd = origBadSentence.indexOf("</marker>") - "<marker>".length();
        if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
            fail(lang + ": No error position markup ('<marker>...</marker>') in bad example in rule " + rule.getFullId());
        }
        String badSentence = cleanXML(origBadSentence);
        assertTrue(badSentence.trim().length() > 0);
        // necessary for XML Pattern rules containing <or>
        List<RuleMatch> matches = new ArrayList<>();
        for (Rule auxRule : rules) {
            matches.addAll(getMatches(auxRule, badSentence, languageTool));
        }
        if (rule instanceof RegexPatternRule || rule instanceof PatternRule && !((PatternRule) rule).isWithComplexPhrase()) {
            if (matches.size() != 1) {
                AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(badSentence);
                StringBuilder sb = new StringBuilder("Analyzed token readings:");
                for (AnalyzedTokenReadings atr : analyzedSentence.getTokens()) {
                    sb.append(" ").append(atr);
                }
                String info = "";
                if (rule instanceof RegexPatternRule) {
                    info = "\nRegexp: " + ((RegexPatternRule) rule).getPattern().toString();
                }
                fail(lang + " rule " + rule.getFullId() + ":\n\"" + badSentence + "\"\n" + "Errors expected: 1\n" + "Errors found   : " + matches.size() + "\n" + "Message: " + rule.getMessage() + "\n" + sb + "\nMatches: " + matches + info);
            }
            assertEquals(lang + ": Incorrect match position markup (start) for rule " + rule.getFullId() + ", sentence: " + badSentence, expectedMatchStart, matches.get(0).getFromPos());
            assertEquals(lang + ": Incorrect match position markup (end) for rule " + rule.getFullId() + ", sentence: " + badSentence, expectedMatchEnd, matches.get(0).getToPos());
            // make sure suggestion is what we expect it to be
            assertSuggestions(badSentence, lang, expectedCorrections, rule, matches);
            // make sure the suggested correction doesn't produce an error:
            if (matches.get(0).getSuggestedReplacements().size() > 0) {
                int fromPos = matches.get(0).getFromPos();
                int toPos = matches.get(0).getToPos();
                for (String replacement : matches.get(0).getSuggestedReplacements()) {
                    String fixedSentence = badSentence.substring(0, fromPos) + replacement + badSentence.substring(toPos);
                    matches = getMatches(rule, fixedSentence, languageTool);
                    if (matches.size() > 0) {
                        fail("Incorrect input:\n" + "  " + badSentence + "\nCorrected sentence:\n" + "  " + fixedSentence + "\nBy Rule:\n" + "  " + rule.getFullId() + "\nThe correction triggered an error itself:\n" + "  " + matches.get(0) + "\n");
                    }
                }
            }
        } else {
            // for multiple rules created with complex phrases
            matches = getMatches(rule, badSentence, languageTool);
            if (matches.size() == 0 && !complexRules.containsKey(rule.getId() + badSentence)) {
                complexRules.put(rule.getId() + badSentence, rule);
            }
            if (matches.size() != 0) {
                complexRules.put(rule.getId() + badSentence, null);
                assertTrue(lang + ": Did expect one error in: \"" + badSentence + "\" (Rule: " + rule.getFullId() + "), got " + matches.size(), matches.size() == 1);
                assertEquals(lang + ": Incorrect match position markup (start) for rule " + rule.getFullId(), expectedMatchStart, matches.get(0).getFromPos());
                assertEquals(lang + ": Incorrect match position markup (end) for rule " + rule.getFullId(), expectedMatchEnd, matches.get(0).getToPos());
                assertSuggestions(badSentence, lang, expectedCorrections, rule, matches);
                assertSuggestionsDoNotCreateErrors(badSentence, languageTool, rule, matches);
            }
        }
    // check for overlapping rules
    /*matches = getMatches(rule, badSentence, languageTool);
      List<RuleMatch> matchesAllRules = allRulesLanguageTool.check(badSentence);
      for (RuleMatch match : matchesAllRules) {
        if (!match.getRule().getId().equals(rule.getId()) && !matches.isEmpty()
            && rangeIsOverlapping(matches.get(0).getFromPos(), matches.get(0).getToPos(), match.getFromPos(), match.getToPos()))
          System.err.println("WARN: " + lang.getShortCode() + ": '" + badSentence + "' in "
                  + rule.getId() + " also matched " + match.getRule().getId());
      }*/
    }
}
Also used : DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) RuleMatch(org.languagetool.rules.RuleMatch) AnalyzedSentence(org.languagetool.AnalyzedSentence) IncorrectExample(org.languagetool.rules.IncorrectExample) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) Rule(org.languagetool.rules.Rule)

Aggregations

IncorrectExample (org.languagetool.rules.IncorrectExample)12 Rule (org.languagetool.rules.Rule)8 JLanguageTool (org.languagetool.JLanguageTool)4 File (java.io.File)3 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)3 PatternRule (org.languagetool.rules.patterns.PatternRule)3 FileReader (java.io.FileReader)2 Test (org.junit.Test)2 CorrectExample (org.languagetool.rules.CorrectExample)2 DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)2 ArrayList (java.util.ArrayList)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 AnalyzedSentence (org.languagetool.AnalyzedSentence)1 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)1 MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)1 ChunkTag (org.languagetool.chunking.ChunkTag)1 RuleMatch (org.languagetool.rules.RuleMatch)1 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)1