Search in sources :

Example 91 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class HiddenCharacterRuleTest method testRule.

@Test
public void testRule() throws IOException {
    final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
    final JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);
    //incorrect sentences:
    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смi­ття"));
    // check match positions:
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 92 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class MorfologikUkrainianSpellerRuleTest method testProhibitedSuggestions.

@Test
public void testProhibitedSuggestions() throws IOException {
    MorfologikUkrainianSpellerRule rule = new MorfologikUkrainianSpellerRule(TestTools.getMessages("uk"), new Ukrainian());
    JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    RuleMatch[] match = rule.match(langTool.getAnalyzedSentence("онлайннавчання"));
    assertEquals(1, match.length);
    //    assertEquals(Arrays.asList("онлайн-навчання"), match[0].getSuggestedReplacements());
    match = rule.match(langTool.getAnalyzedSentence("авіабегемот"));
    assertEquals(1, match.length);
    assertTrue("Should be empty: " + match[0].getSuggestedReplacements().toString(), match[0].getSuggestedReplacements().isEmpty());
    match = rule.match(langTool.getAnalyzedSentence("вело-маршрут"));
    assertEquals(1, match.length);
    assertEquals(Arrays.asList("веломаршрут"), match[0].getSuggestedReplacements());
    match = rule.match(langTool.getAnalyzedSentence("відео-маршрут"));
    assertEquals(1, match.length);
    assertEquals(new ArrayList<String>(), match[0].getSuggestedReplacements());
    match = rule.match(langTool.getAnalyzedSentence("вело-бегемот"));
    assertEquals(1, match.length);
    assertTrue("Unexpected suggestions: " + match[0].getSuggestedReplacements().toString(), match[0].getSuggestedReplacements().isEmpty());
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 93 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class TokenInflectionAgreementRule method match.

@Override
public final RuleMatch[] match(AnalyzedSentence text) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
    List<AnalyzedToken> adjTokenReadings = new ArrayList<>();
    AnalyzedTokenReadings adjAnalyzedTokenReadings = null;
    for (int i = 1; i < tokens.length; i++) {
        AnalyzedTokenReadings tokenReadings = tokens[i];
        String posTag0 = tokenReadings.getAnalyzedToken(0).getPOSTag();
        if (posTag0 == null) {
            //          || posTag0.equals(JLanguageTool.SENTENCE_START_TAGNAME) ){
            adjTokenReadings.clear();
            continue;
        }
        if (adjTokenReadings.isEmpty()) {
            // no need to start checking on last token or if no noun
            if (i == tokens.length - 1)
                continue;
            //TODO: nv still can be wrong if :np/:ns is present to it's not much gain for lots of work
            if (PosTagHelper.hasPosTagPart(tokens[i], ":nv") || //TODO: turn back on when we can handle pron
            PosTagHelper.hasPosTagPart(tokens[i], "&pron") || PosTagHelper.hasPosTagPart(tokens[i], "<"))
                continue;
            if (!PosTagHelper.hasPosTagPart(tokens[i + 1], "noun:") || PosTagHelper.hasPosTagPart(tokens[i + 1], ":nv") || PosTagHelper.hasPosTagPart(tokens[i + 1], "&pron") || PosTagHelper.hasPosTagPart(tokens[i + 1], "<"))
                continue;
            if (LemmaHelper.hasLemma(tokens[i], Arrays.asList("червоний", "правий", "місцевий", "найсильніший", "найкращі"), ":p:") || LemmaHelper.hasLemma(tokens[i], Arrays.asList("новенький", "головний", "вибраний", "більший", "побачений", "подібний"), ":n:") || LemmaHelper.hasLemma(tokens[i], Arrays.asList("державний"), ":f:")) {
                adjTokenReadings.clear();
                break;
            }
            for (AnalyzedToken token : tokenReadings) {
                String adjPosTag = token.getPOSTag();
                if (adjPosTag == null) {
                    // can happen for words with ́ or ­
                    continue;
                }
                if (adjPosTag.startsWith("adj")) {
                    adjTokenReadings.add(token);
                    adjAnalyzedTokenReadings = tokenReadings;
                } else {
                    adjTokenReadings.clear();
                    break;
                }
            }
            continue;
        }
        List<AnalyzedToken> slaveTokenReadings = new ArrayList<>();
        for (AnalyzedToken token : tokenReadings) {
            String nounPosTag = token.getPOSTag();
            if (nounPosTag == null) {
                // can happen for words with ́ or ­
                continue;
            }
            if (nounPosTag.startsWith("noun") && !nounPosTag.contains(NO_VIDMINOK_SUBSTR)) {
                slaveTokenReadings.add(token);
            } else if (nounPosTag.equals(JLanguageTool.SENTENCE_END_TAGNAME) || nounPosTag.equals(JLanguageTool.PARAGRAPH_END_TAGNAME)) {
                continue;
            } else {
                slaveTokenReadings.clear();
                break;
            }
        }
        if (slaveTokenReadings.isEmpty()) {
            adjTokenReadings.clear();
            continue;
        }
        if (DEBUG) {
            System.err.println(MessageFormat.format("=== Checking:\n\t{0}\n\t{1}", adjTokenReadings, slaveTokenReadings));
        }
        // perform the check
        List<InflectionHelper.Inflection> masterInflections = InflectionHelper.getAdjInflections(adjTokenReadings);
        List<InflectionHelper.Inflection> slaveInflections = InflectionHelper.getNounInflections(slaveTokenReadings);
        if (Collections.disjoint(masterInflections, slaveInflections)) {
            if (TokenInflectionExceptionHelper.isException(tokens, i, masterInflections, slaveInflections, adjTokenReadings, slaveTokenReadings)) {
                adjTokenReadings.clear();
                continue;
            }
            if (DEBUG) {
                System.err.println(MessageFormat.format("=== Found:\n\t{0}\n\t", adjAnalyzedTokenReadings.getToken() + ": " + masterInflections + " // " + adjAnalyzedTokenReadings, slaveTokenReadings.get(0).getToken() + ": " + slaveInflections + " // " + slaveTokenReadings));
            }
            String msg = String.format("Потенційна помилка: прикметник не узгоджений з іменником: \"%s\": [%s] і \"%s\": [%s]", adjTokenReadings.get(0).getToken(), formatInflections(masterInflections, true), slaveTokenReadings.get(0).getToken(), formatInflections(slaveInflections, false));
            if (PosTagHelper.hasPosTagPart(adjTokenReadings, ":m:v_rod") && tokens[i].getToken().matches(".*[ую]") && PosTagHelper.hasPosTag(slaveTokenReadings, "noun.*:m:v_dav.*")) {
                msg += ". Можливо вжито невнормований родовий відмінок ч.р. з закінченням -у/-ю замість -а/-я (така тенденція є в сучасній мові)?";
            }
            RuleMatch potentialRuleMatch = new RuleMatch(this, adjAnalyzedTokenReadings.getStartPos(), tokenReadings.getEndPos(), msg, getShort());
            Synthesizer ukrainianSynthesizer = ukrainian.getSynthesizer();
            List<String> suggestions = new ArrayList<>();
            try {
                for (Inflection adjInflection : masterInflections) {
                    String genderTag = ":" + adjInflection.gender + ":";
                    String vidmTag = adjInflection._case;
                    if (!adjInflection._case.equals("v_kly") && (adjInflection.gender.equals("p") || PosTagHelper.hasPosTagPart(slaveTokenReadings, genderTag))) {
                        for (AnalyzedToken nounToken : slaveTokenReadings) {
                            if (adjInflection.animMatters()) {
                                if (!nounToken.getPOSTag().contains(":" + adjInflection.animTag))
                                    continue;
                            }
                            String newNounPosTag = nounToken.getPOSTag().replaceFirst(":.:v_...", genderTag + vidmTag);
                            String[] synthesized = ukrainianSynthesizer.synthesize(nounToken, newNounPosTag, false);
                            for (String s : synthesized) {
                                String suggestion = adjAnalyzedTokenReadings.getToken() + " " + s;
                                if (!suggestions.contains(suggestion)) {
                                    suggestions.add(suggestion);
                                }
                            }
                        }
                    }
                }
                for (Inflection nounInflection : slaveInflections) {
                    String genderTag = ":" + nounInflection.gender + ":";
                    String vidmTag = nounInflection._case;
                    if (nounInflection.animMatters()) {
                        vidmTag += ":r" + nounInflection.animTag;
                    }
                    for (AnalyzedToken adjToken : adjTokenReadings) {
                        String newAdjTag = adjToken.getPOSTag().replaceFirst(":.:v_...(:r(in)?anim)?", genderTag + vidmTag);
                        String[] synthesized = ukrainianSynthesizer.synthesize(adjToken, newAdjTag, false);
                        for (String s : synthesized) {
                            String suggestion = s + " " + tokenReadings.getToken();
                            if (!suggestions.contains(suggestion)) {
                                suggestions.add(suggestion);
                            }
                        }
                    }
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            if (suggestions.size() > 0) {
                potentialRuleMatch.setSuggestedReplacements(suggestions);
            }
            ruleMatches.add(potentialRuleMatch);
        }
        adjTokenReadings.clear();
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : ArrayList(java.util.ArrayList) Inflection(org.languagetool.rules.uk.InflectionHelper.Inflection) IOException(java.io.IOException) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) RuleMatch(org.languagetool.rules.RuleMatch) AnalyzedToken(org.languagetool.AnalyzedToken) Synthesizer(org.languagetool.synthesis.Synthesizer)

Example 94 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class UkrainianWordRepeatRule method createRuleMatch.

@Override
protected RuleMatch createRuleMatch(String prevToken, String token, int prevPos, int pos, String msg) {
    boolean doubleI = prevToken.equals("І") && token.equals("і");
    if (doubleI) {
        msg += " або, можливо, перша І має бути латинською.";
    }
    RuleMatch ruleMatch = super.createRuleMatch(prevToken, token, prevPos, pos, msg);
    if (doubleI) {
        List<String> replacements = new ArrayList<>(ruleMatch.getSuggestedReplacements());
        replacements.add("I і");
        ruleMatch.setSuggestedReplacements(replacements);
    }
    return ruleMatch;
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch)

Example 95 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class PunctuationCheckRuleTest method testRule.

@Test
public void testRule() throws IOException {
    PunctuationCheckRule rule = new PunctuationCheckRule(TestTools.getEnglishMessages());
    RuleMatch[] matches;
    JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    // correct sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Дві, коми. Ось: дві!!!"));
    assertEquals(0, matches.length);
    // correct sentences:
    matches = rule.match(langTool.getAnalyzedSentence("- Це ваша пряма мова?!!"));
    assertEquals(0, matches.length);
    // correct sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Дві,- коми!.."));
    assertEquals(0, matches.length);
    // correct sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Таке питання?.."));
    assertEquals(0, matches.length);
    // correct sentences:
    // поки що ігноруємо - не царська це справа :)
    matches = rule.match(langTool.getAnalyzedSentence("Два  пробіли."));
    assertEquals(0, matches.length);
    // incorrect sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Дві крапки.."));
    assertEquals(1, matches.length);
    assertEquals(1, matches[0].getSuggestedReplacements().size());
    assertEquals(".", matches[0].getSuggestedReplacements().get(0));
    // incorrect sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Дві,, коми."));
    assertEquals(1, matches.length);
    // incorrect sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Не там ,кома."));
    assertEquals(1, matches.length);
    // incorrect sentences:
    matches = rule.match(langTool.getAnalyzedSentence("Двокрапка:- з тире."));
    assertEquals(1, matches.length);
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Aggregations

RuleMatch (org.languagetool.rules.RuleMatch)144 Test (org.junit.Test)64 JLanguageTool (org.languagetool.JLanguageTool)54 ArrayList (java.util.ArrayList)30 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)14 Rule (org.languagetool.rules.Rule)14 Language (org.languagetool.Language)10 PatternRule (org.languagetool.rules.patterns.PatternRule)10 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 Ukrainian (org.languagetool.language.Ukrainian)8 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)8 Matcher (java.util.regex.Matcher)7 English (org.languagetool.language.English)7 IOException (java.io.IOException)6 Catalan (org.languagetool.language.Catalan)6 Polish (org.languagetool.language.Polish)6 GermanyGerman (org.languagetool.language.GermanyGerman)5 AnnotatedText (org.languagetool.markup.AnnotatedText)5 PatternToken (org.languagetool.rules.patterns.PatternToken)5 AnalyzedToken (org.languagetool.AnalyzedToken)4