use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class HiddenCharacterRuleTest method testRule.
@Test
public void testRule() throws IOException {
final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
final JLanguageTool langTool = new JLanguageTool(new Ukrainian());
// correct sentences:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);
//incorrect sentences:
RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смiття"));
// check match positions:
assertEquals(1, matches.length);
assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class MorfologikUkrainianSpellerRuleTest method testProhibitedSuggestions.
@Test
public void testProhibitedSuggestions() throws IOException {
MorfologikUkrainianSpellerRule rule = new MorfologikUkrainianSpellerRule(TestTools.getMessages("uk"), new Ukrainian());
JLanguageTool langTool = new JLanguageTool(new Ukrainian());
RuleMatch[] match = rule.match(langTool.getAnalyzedSentence("онлайннавчання"));
assertEquals(1, match.length);
// assertEquals(Arrays.asList("онлайн-навчання"), match[0].getSuggestedReplacements());
match = rule.match(langTool.getAnalyzedSentence("авіабегемот"));
assertEquals(1, match.length);
assertTrue("Should be empty: " + match[0].getSuggestedReplacements().toString(), match[0].getSuggestedReplacements().isEmpty());
match = rule.match(langTool.getAnalyzedSentence("вело-маршрут"));
assertEquals(1, match.length);
assertEquals(Arrays.asList("веломаршрут"), match[0].getSuggestedReplacements());
match = rule.match(langTool.getAnalyzedSentence("відео-маршрут"));
assertEquals(1, match.length);
assertEquals(new ArrayList<String>(), match[0].getSuggestedReplacements());
match = rule.match(langTool.getAnalyzedSentence("вело-бегемот"));
assertEquals(1, match.length);
assertTrue("Unexpected suggestions: " + match[0].getSuggestedReplacements().toString(), match[0].getSuggestedReplacements().isEmpty());
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class TokenInflectionAgreementRule method match.
@Override
public final RuleMatch[] match(AnalyzedSentence text) {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
List<AnalyzedToken> adjTokenReadings = new ArrayList<>();
AnalyzedTokenReadings adjAnalyzedTokenReadings = null;
for (int i = 1; i < tokens.length; i++) {
AnalyzedTokenReadings tokenReadings = tokens[i];
String posTag0 = tokenReadings.getAnalyzedToken(0).getPOSTag();
if (posTag0 == null) {
// || posTag0.equals(JLanguageTool.SENTENCE_START_TAGNAME) ){
adjTokenReadings.clear();
continue;
}
if (adjTokenReadings.isEmpty()) {
// no need to start checking on last token or if no noun
if (i == tokens.length - 1)
continue;
//TODO: nv still can be wrong if :np/:ns is present to it's not much gain for lots of work
if (PosTagHelper.hasPosTagPart(tokens[i], ":nv") || //TODO: turn back on when we can handle pron
PosTagHelper.hasPosTagPart(tokens[i], "&pron") || PosTagHelper.hasPosTagPart(tokens[i], "<"))
continue;
if (!PosTagHelper.hasPosTagPart(tokens[i + 1], "noun:") || PosTagHelper.hasPosTagPart(tokens[i + 1], ":nv") || PosTagHelper.hasPosTagPart(tokens[i + 1], "&pron") || PosTagHelper.hasPosTagPart(tokens[i + 1], "<"))
continue;
if (LemmaHelper.hasLemma(tokens[i], Arrays.asList("червоний", "правий", "місцевий", "найсильніший", "найкращі"), ":p:") || LemmaHelper.hasLemma(tokens[i], Arrays.asList("новенький", "головний", "вибраний", "більший", "побачений", "подібний"), ":n:") || LemmaHelper.hasLemma(tokens[i], Arrays.asList("державний"), ":f:")) {
adjTokenReadings.clear();
break;
}
for (AnalyzedToken token : tokenReadings) {
String adjPosTag = token.getPOSTag();
if (adjPosTag == null) {
// can happen for words with ́ or
continue;
}
if (adjPosTag.startsWith("adj")) {
adjTokenReadings.add(token);
adjAnalyzedTokenReadings = tokenReadings;
} else {
adjTokenReadings.clear();
break;
}
}
continue;
}
List<AnalyzedToken> slaveTokenReadings = new ArrayList<>();
for (AnalyzedToken token : tokenReadings) {
String nounPosTag = token.getPOSTag();
if (nounPosTag == null) {
// can happen for words with ́ or
continue;
}
if (nounPosTag.startsWith("noun") && !nounPosTag.contains(NO_VIDMINOK_SUBSTR)) {
slaveTokenReadings.add(token);
} else if (nounPosTag.equals(JLanguageTool.SENTENCE_END_TAGNAME) || nounPosTag.equals(JLanguageTool.PARAGRAPH_END_TAGNAME)) {
continue;
} else {
slaveTokenReadings.clear();
break;
}
}
if (slaveTokenReadings.isEmpty()) {
adjTokenReadings.clear();
continue;
}
if (DEBUG) {
System.err.println(MessageFormat.format("=== Checking:\n\t{0}\n\t{1}", adjTokenReadings, slaveTokenReadings));
}
// perform the check
List<InflectionHelper.Inflection> masterInflections = InflectionHelper.getAdjInflections(adjTokenReadings);
List<InflectionHelper.Inflection> slaveInflections = InflectionHelper.getNounInflections(slaveTokenReadings);
if (Collections.disjoint(masterInflections, slaveInflections)) {
if (TokenInflectionExceptionHelper.isException(tokens, i, masterInflections, slaveInflections, adjTokenReadings, slaveTokenReadings)) {
adjTokenReadings.clear();
continue;
}
if (DEBUG) {
System.err.println(MessageFormat.format("=== Found:\n\t{0}\n\t", adjAnalyzedTokenReadings.getToken() + ": " + masterInflections + " // " + adjAnalyzedTokenReadings, slaveTokenReadings.get(0).getToken() + ": " + slaveInflections + " // " + slaveTokenReadings));
}
String msg = String.format("Потенційна помилка: прикметник не узгоджений з іменником: \"%s\": [%s] і \"%s\": [%s]", adjTokenReadings.get(0).getToken(), formatInflections(masterInflections, true), slaveTokenReadings.get(0).getToken(), formatInflections(slaveInflections, false));
if (PosTagHelper.hasPosTagPart(adjTokenReadings, ":m:v_rod") && tokens[i].getToken().matches(".*[ую]") && PosTagHelper.hasPosTag(slaveTokenReadings, "noun.*:m:v_dav.*")) {
msg += ". Можливо вжито невнормований родовий відмінок ч.р. з закінченням -у/-ю замість -а/-я (така тенденція є в сучасній мові)?";
}
RuleMatch potentialRuleMatch = new RuleMatch(this, adjAnalyzedTokenReadings.getStartPos(), tokenReadings.getEndPos(), msg, getShort());
Synthesizer ukrainianSynthesizer = ukrainian.getSynthesizer();
List<String> suggestions = new ArrayList<>();
try {
for (Inflection adjInflection : masterInflections) {
String genderTag = ":" + adjInflection.gender + ":";
String vidmTag = adjInflection._case;
if (!adjInflection._case.equals("v_kly") && (adjInflection.gender.equals("p") || PosTagHelper.hasPosTagPart(slaveTokenReadings, genderTag))) {
for (AnalyzedToken nounToken : slaveTokenReadings) {
if (adjInflection.animMatters()) {
if (!nounToken.getPOSTag().contains(":" + adjInflection.animTag))
continue;
}
String newNounPosTag = nounToken.getPOSTag().replaceFirst(":.:v_...", genderTag + vidmTag);
String[] synthesized = ukrainianSynthesizer.synthesize(nounToken, newNounPosTag, false);
for (String s : synthesized) {
String suggestion = adjAnalyzedTokenReadings.getToken() + " " + s;
if (!suggestions.contains(suggestion)) {
suggestions.add(suggestion);
}
}
}
}
}
for (Inflection nounInflection : slaveInflections) {
String genderTag = ":" + nounInflection.gender + ":";
String vidmTag = nounInflection._case;
if (nounInflection.animMatters()) {
vidmTag += ":r" + nounInflection.animTag;
}
for (AnalyzedToken adjToken : adjTokenReadings) {
String newAdjTag = adjToken.getPOSTag().replaceFirst(":.:v_...(:r(in)?anim)?", genderTag + vidmTag);
String[] synthesized = ukrainianSynthesizer.synthesize(adjToken, newAdjTag, false);
for (String s : synthesized) {
String suggestion = s + " " + tokenReadings.getToken();
if (!suggestions.contains(suggestion)) {
suggestions.add(suggestion);
}
}
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
if (suggestions.size() > 0) {
potentialRuleMatch.setSuggestedReplacements(suggestions);
}
ruleMatches.add(potentialRuleMatch);
}
adjTokenReadings.clear();
}
return toRuleMatchArray(ruleMatches);
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class UkrainianWordRepeatRule method createRuleMatch.
@Override
protected RuleMatch createRuleMatch(String prevToken, String token, int prevPos, int pos, String msg) {
boolean doubleI = prevToken.equals("І") && token.equals("і");
if (doubleI) {
msg += " або, можливо, перша І має бути латинською.";
}
RuleMatch ruleMatch = super.createRuleMatch(prevToken, token, prevPos, pos, msg);
if (doubleI) {
List<String> replacements = new ArrayList<>(ruleMatch.getSuggestedReplacements());
replacements.add("I і");
ruleMatch.setSuggestedReplacements(replacements);
}
return ruleMatch;
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class PunctuationCheckRuleTest method testRule.
@Test
public void testRule() throws IOException {
PunctuationCheckRule rule = new PunctuationCheckRule(TestTools.getEnglishMessages());
RuleMatch[] matches;
JLanguageTool langTool = new JLanguageTool(new Ukrainian());
// correct sentences:
matches = rule.match(langTool.getAnalyzedSentence("Дві, коми. Ось: дві!!!"));
assertEquals(0, matches.length);
// correct sentences:
matches = rule.match(langTool.getAnalyzedSentence("- Це ваша пряма мова?!!"));
assertEquals(0, matches.length);
// correct sentences:
matches = rule.match(langTool.getAnalyzedSentence("Дві,- коми!.."));
assertEquals(0, matches.length);
// correct sentences:
matches = rule.match(langTool.getAnalyzedSentence("Таке питання?.."));
assertEquals(0, matches.length);
// correct sentences:
// поки що ігноруємо - не царська це справа :)
matches = rule.match(langTool.getAnalyzedSentence("Два пробіли."));
assertEquals(0, matches.length);
// incorrect sentences:
matches = rule.match(langTool.getAnalyzedSentence("Дві крапки.."));
assertEquals(1, matches.length);
assertEquals(1, matches[0].getSuggestedReplacements().size());
assertEquals(".", matches[0].getSuggestedReplacements().get(0));
// incorrect sentences:
matches = rule.match(langTool.getAnalyzedSentence("Дві,, коми."));
assertEquals(1, matches.length);
// incorrect sentences:
matches = rule.match(langTool.getAnalyzedSentence("Не там ,кома."));
assertEquals(1, matches.length);
// incorrect sentences:
matches = rule.match(langTool.getAnalyzedSentence("Двокрапка:- з тире."));
assertEquals(1, matches.length);
}
Aggregations