Search in sources :

Example 31 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class WordRepeatBeginningRule method match.

@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
    String lastToken = "";
    String beforeLastToken = "";
    List<RuleMatch> ruleMatches = new ArrayList<>();
    int pos = 0;
    for (AnalyzedSentence sentence : sentences) {
        AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
        if (tokens.length > 3) {
            AnalyzedTokenReadings analyzedToken = tokens[1];
            String token = analyzedToken.getToken();
            // avoid "..." etc. to be matched:
            boolean isWord = true;
            if (token.length() == 1) {
                char c = token.charAt(0);
                if (!Character.isLetter(c)) {
                    isWord = false;
                }
            }
            if (isWord && lastToken.equals(token) && !isException(token) && !isException(tokens[2].getToken()) && !isException(tokens[3].getToken())) {
                String shortMsg;
                if (isAdverb(analyzedToken)) {
                    shortMsg = messages.getString("desc_repetition_beginning_adv");
                } else if (beforeLastToken.equals(token)) {
                    shortMsg = messages.getString("desc_repetition_beginning_word");
                } else {
                    shortMsg = "";
                }
                if (!shortMsg.isEmpty()) {
                    String msg = shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
                    int startPos = analyzedToken.getStartPos();
                    int endPos = startPos + token.length();
                    RuleMatch ruleMatch = new RuleMatch(this, pos + startPos, pos + endPos, msg, shortMsg);
                    ruleMatches.add(ruleMatch);
                }
            }
            beforeLastToken = lastToken;
            lastToken = token;
        }
        pos += sentence.getText().length();
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 32 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class FalseFriendsAsBitextLoaderTest method match.

private RuleMatch[] match(BitextPatternRule rule, String src, String trg, JLanguageTool srcLanguageTool, JLanguageTool trgLanguageTool) throws IOException {
    AnalyzedSentence srcText = srcLanguageTool.getAnalyzedSentence(src);
    AnalyzedSentence trgText = trgLanguageTool.getAnalyzedSentence(trg);
    return rule.match(srcText, trgText);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence)

Example 33 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class PatternRuleTest method testBadSentences.

private void testBadSentences(JLanguageTool languageTool, JLanguageTool allRulesLanguageTool, Language lang, Map<String, AbstractPatternRule> complexRules, AbstractPatternRule rule) throws IOException {
    List<IncorrectExample> badSentences = rule.getIncorrectExamples();
    if (badSentences.size() == 0) {
        fail("No incorrect examples found for rule " + rule.getFullId());
    }
    // necessary for XML Pattern rules containing <or>
    List<AbstractPatternRule> rules = allRulesLanguageTool.getPatternRulesByIdAndSubId(rule.getId(), rule.getSubId());
    for (IncorrectExample origBadExample : badSentences) {
        // enable indentation use
        String origBadSentence = origBadExample.getExample().replaceAll("[\\n\\t]+", "");
        List<String> expectedCorrections = origBadExample.getCorrections();
        int expectedMatchStart = origBadSentence.indexOf("<marker>");
        int expectedMatchEnd = origBadSentence.indexOf("</marker>") - "<marker>".length();
        if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
            fail(lang + ": No error position markup ('<marker>...</marker>') in bad example in rule " + rule.getFullId());
        }
        String badSentence = cleanXML(origBadSentence);
        assertTrue(badSentence.trim().length() > 0);
        // necessary for XML Pattern rules containing <or>
        List<RuleMatch> matches = new ArrayList<>();
        for (Rule auxRule : rules) {
            matches.addAll(getMatches(auxRule, badSentence, languageTool));
        }
        if (rule instanceof RegexPatternRule || rule instanceof PatternRule && !((PatternRule) rule).isWithComplexPhrase()) {
            if (matches.size() != 1) {
                AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(badSentence);
                StringBuilder sb = new StringBuilder("Analyzed token readings:");
                for (AnalyzedTokenReadings atr : analyzedSentence.getTokens()) {
                    sb.append(" ").append(atr);
                }
                String info = "";
                if (rule instanceof RegexPatternRule) {
                    info = "\nRegexp: " + ((RegexPatternRule) rule).getPattern().toString();
                }
                fail(lang + " rule " + rule.getFullId() + ":\n\"" + badSentence + "\"\n" + "Errors expected: 1\n" + "Errors found   : " + matches.size() + "\n" + "Message: " + rule.getMessage() + "\n" + sb + "\nMatches: " + matches + info);
            }
            assertEquals(lang + ": Incorrect match position markup (start) for rule " + rule.getFullId() + ", sentence: " + badSentence, expectedMatchStart, matches.get(0).getFromPos());
            assertEquals(lang + ": Incorrect match position markup (end) for rule " + rule.getFullId() + ", sentence: " + badSentence, expectedMatchEnd, matches.get(0).getToPos());
            // make sure suggestion is what we expect it to be
            assertSuggestions(badSentence, lang, expectedCorrections, rule, matches);
            // make sure the suggested correction doesn't produce an error:
            if (matches.get(0).getSuggestedReplacements().size() > 0) {
                int fromPos = matches.get(0).getFromPos();
                int toPos = matches.get(0).getToPos();
                for (String replacement : matches.get(0).getSuggestedReplacements()) {
                    String fixedSentence = badSentence.substring(0, fromPos) + replacement + badSentence.substring(toPos);
                    matches = getMatches(rule, fixedSentence, languageTool);
                    if (matches.size() > 0) {
                        fail("Incorrect input:\n" + "  " + badSentence + "\nCorrected sentence:\n" + "  " + fixedSentence + "\nBy Rule:\n" + "  " + rule.getFullId() + "\nThe correction triggered an error itself:\n" + "  " + matches.get(0) + "\n");
                    }
                }
            }
        } else {
            // for multiple rules created with complex phrases
            matches = getMatches(rule, badSentence, languageTool);
            if (matches.size() == 0 && !complexRules.containsKey(rule.getId() + badSentence)) {
                complexRules.put(rule.getId() + badSentence, rule);
            }
            if (matches.size() != 0) {
                complexRules.put(rule.getId() + badSentence, null);
                assertTrue(lang + ": Did expect one error in: \"" + badSentence + "\" (Rule: " + rule.getFullId() + "), got " + matches.size(), matches.size() == 1);
                assertEquals(lang + ": Incorrect match position markup (start) for rule " + rule.getFullId(), expectedMatchStart, matches.get(0).getFromPos());
                assertEquals(lang + ": Incorrect match position markup (end) for rule " + rule.getFullId(), expectedMatchEnd, matches.get(0).getToPos());
                assertSuggestions(badSentence, lang, expectedCorrections, rule, matches);
                assertSuggestionsDoNotCreateErrors(badSentence, languageTool, rule, matches);
            }
        }
    // check for overlapping rules
    /*matches = getMatches(rule, badSentence, languageTool);
      List<RuleMatch> matchesAllRules = allRulesLanguageTool.check(badSentence);
      for (RuleMatch match : matchesAllRules) {
        if (!match.getRule().getId().equals(rule.getId()) && !matches.isEmpty()
            && rangeIsOverlapping(matches.get(0).getFromPos(), matches.get(0).getToPos(), match.getFromPos(), match.getToPos()))
          System.err.println("WARN: " + lang.getShortCode() + ": '" + badSentence + "' in "
                  + rule.getId() + " also matched " + match.getRule().getId());
      }*/
    }
}
Also used : DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) RuleMatch(org.languagetool.rules.RuleMatch) AnalyzedSentence(org.languagetool.AnalyzedSentence) IncorrectExample(org.languagetool.rules.IncorrectExample) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) Rule(org.languagetool.rules.Rule)

Example 34 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class PatternRuleTest method match.

private boolean match(Rule rule, String sentence, JLanguageTool languageTool) throws IOException {
    AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
    RuleMatch[] matches = rule.match(analyzedSentence);
    return matches.length > 0;
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) RuleMatch(org.languagetool.rules.RuleMatch)

Example 35 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class TestFrenchDisambiguator method disambiguate.

@Override
public AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
    AnalyzedSentence sentence = input;
    String filePath = "/disambiguator.xml";
    try (InputStream inputStream = getClass().getResourceAsStream(filePath)) {
        final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
        List<DisambiguationPatternRule> disambiguationRules = ruleLoader.getRules(inputStream);
        for (final DisambiguationPatternRule patternRule : disambiguationRules) {
            sentence = patternRule.replace(sentence);
        }
    } catch (Exception e) {
        throw new RuntimeException("Problems with loading disambiguation file: " + filePath, e);
    }
    return sentence;
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) InputStream(java.io.InputStream) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) DisambiguationRuleLoader(org.languagetool.tagging.disambiguation.rules.DisambiguationRuleLoader) IOException(java.io.IOException)

Aggregations

AnalyzedSentence (org.languagetool.AnalyzedSentence)40 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)21 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 JLanguageTool (org.languagetool.JLanguageTool)8 RuleMatch (org.languagetool.rules.RuleMatch)8 Rule (org.languagetool.rules.Rule)5 IOException (java.io.IOException)4 DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)4 English (org.languagetool.language.English)3 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)3 AnalyzedToken (org.languagetool.AnalyzedToken)2 Ukrainian (org.languagetool.language.Ukrainian)2 InputStream (java.io.InputStream)1 Document (org.apache.lucene.document.Document)1 ConfusionSet (org.languagetool.rules.ConfusionSet)1 CorrectExample (org.languagetool.rules.CorrectExample)1 IncorrectExample (org.languagetool.rules.IncorrectExample)1 BitextRule (org.languagetool.rules.bitext.BitextRule)1 ConfusionProbabilityRule (org.languagetool.rules.ngrams.ConfusionProbabilityRule)1