use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class JLanguageToolTest method testPolish.
@Test
public void testPolish() throws IOException {
final Polish noXmlRulesPolish = new Polish() {
@Override
public List<AbstractPatternRule> getPatternRules() {
return Collections.emptyList();
}
};
final Polish polish = new Polish();
JLanguageTool tool = new JLanguageTool(new Polish());
JLanguageTool noRulesTool = new JLanguageTool(noXmlRulesPolish);
assertEquals("[PL]", Arrays.toString(polish.getCountries()));
List<RuleMatch> matches = noRulesTool.check("To jest całkowicie prawidłowe zdanie.");
assertEquals(0, matches.size());
matches = noRulesTool.check("To jest jest problem.");
assertEquals(1, matches.size());
//no error thanks to disambiguation
assertEquals(0, noRulesTool.check("Mają one niemałe znaczenie.").size());
assertEquals(0, noRulesTool.check("Często wystarczy obrócić na wspak wyroki świata, aby trafnie osądzić jakąś osobę.").size());
//with immunization
assertEquals(0, noRulesTool.check("A teraz każcie mi dać jaki bądź posiłek.").size());
assertEquals(0, noRulesTool.check("Kiedym wóz zobaczył, byłbym przysiągł, że wielka przygoda mnie czeka.").size());
//with antipatterns: "wymaluj" in "wypisz wymaluj" is immunized locally for punctuation mistakes,
//so it should get no match
assertEquals(0, noRulesTool.check("Jurek wygląda wypisz wymaluj babcia.").size());
//but it should get a match with word repetitions:
assertEquals(1, noRulesTool.check("Jurek wygląda wypisz wypisz wymaluj babcia.").size());
assertEquals(1, noRulesTool.check("Jurek wygląda wypisz wymaluj wymaluj babcia.").size());
//check for a weird unification bug:
assertEquals(0, noRulesTool.check("Zawarł w niej, oprócz swojej twórczości, wybrane epigramaty czterdziestu ośmiu innych greckich poetów i poetek.").size());
//checking on pattern rules now...
//now this should be immunized:
assertEquals(0, tool.check("Nudne brednie tak zamąciły głowę chłopu, że klął na czym ziemia stoi, zmuszonym będąc słuchać tego wszystkiego.").size());
//but this "chcąc, nie chcąc" immunized only by an antipattern
assertEquals(1, tool.check("Chcąc, nie chcąc zjadłem pstrąga.").size());
//this rule is by default off
matches = tool.check("Był on bowiem pięknym strzelcem bowiem.");
assertEquals(0, matches.size());
tool.enableRule("PL_WORD_REPEAT");
matches = tool.check("Był on bowiem pięknym strzelcem bowiem.");
assertEquals(1, matches.size());
matches = tool.check("Premier drapie się w ucho co i rusz.");
assertEquals(1, matches.size());
// Polish rule has no effect with English error but will get spelling activated:
matches = tool.check("I can give you more a detailed description");
assertEquals(6, matches.size());
tool.setListUnknownWords(true);
matches = tool.check("This is not a Polish text.");
assertEquals(3, matches.size());
assertEquals("[., Polish, This, is, text]", tool.getUnknownWords().toString());
//check positions relative to sentence ends
matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie ma przecinka.");
assertEquals(17, matches.get(0).getColumn());
//with a space...
matches = tool.check("To jest tekst. \nTest 1. To jest linia w której nie ma przecinka.");
assertEquals(16, matches.get(0).getColumn());
matches = tool.check("To jest tekst. Test 1. To jest linia w której nie ma przecinka.");
assertEquals(32, matches.get(0).getColumn());
//recheck with the -b mode...
polish.getSentenceTokenizer().setSingleLineBreaksMarksParagraph(true);
tool = new JLanguageTool(polish);
matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie ma przecinka.");
assertEquals(17, matches.get(0).getColumn());
//with a space...
matches = tool.check("To jest tekst. \nTest 1. To jest linia w której nie ma przecinka.");
assertEquals(17, matches.get(0).getColumn());
matches = tool.check("To jest tekst. To jest linia w której nie ma przecinka.");
assertEquals(24, matches.get(0).getColumn());
//and let's test other feats
AnalyzedSentence sent = tool.getAnalyzedSentence("Z powodu pogody dobre buty są wskazane.");
assertEquals("Disambiguator log: \n" + "\n" + "prep_verb[2]: Z[z/prep:acc:nwok*,z/prep:gen:nwok*,z/prep:inst:nwok*] -> Z[z/prep:gen:nwok*]\n" + "PREP_SUBST[1]: Z[z/prep:gen:nwok*] -> Z[z/prep:gen:nwok*]\n" + "PREP_SUBST_2[1]: Z[z/prep:gen:nwok*] -> Z[z/prep:gen:nwok*]\n" + "MULTIWORD_CHUNKER: Z[z/prep:gen:nwok*] -> Z[z/prep:gen:nwok*,Z powodu/<PREP:GEN>*]\n" + "\n" + "prep_verb[2]: powodu[powód/subst:sg:gen:m3] -> powodu[powód/subst:sg:gen:m3]\n" + "PREP_SUBST[1]: powodu[powód/subst:sg:gen:m3] -> powodu[powód/subst:sg:gen:m3]\n" + "PREP_SUBST_2[1]: powodu[powód/subst:sg:gen:m3] -> powodu[powód/subst:sg:gen:m3]\n" + "MULTIWORD_CHUNKER: powodu[powód/subst:sg:gen:m3] -> powodu[powód/subst:sg:gen:m3,Z powodu/</PREP:GEN>]\n" + "\n" + "PREP_SUBST[17]: pogody[pogoda/subst:pl:acc:f,pogoda/subst:pl:nom:f,pogoda/subst:pl:voc:f,pogoda/subst:sg:gen:f] -> pogody[pogoda/subst:sg:gen:f]\n" + "\n" + "dobry_adj[1]: dobre[dobre/subst:pl:acc:n2,dobre/subst:pl:nom:n2,dobre/subst:pl:voc:n2,dobre/subst:sg:acc:n2,dobre/subst:sg:nom:n2,dobre/subst:sg:voc:n2,dobry/adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos,dobry/adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos,dobry/adj:sg:acc:n1.n2:pos,dobry/adj:sg:nom.voc:n1.n2:pos,dobry/depr:pl:nom:m2,dobry/depr:pl:voc:m2,dobry/subst:pl:acc:m3,dobry/subst:pl:nom:m3,dobry/subst:pl:voc:m3] -> dobre[dobry/adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos,dobry/adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos]\n" + "unify_adj_subst[2]: dobre[dobry/adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos,dobry/adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos] -> dobre[dobry/adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos]\n" + "\n" + "dobry_adj[1]: buty[but/subst:pl:acc:m2,but/subst:pl:acc:m3,but/subst:pl:nom:m2,but/subst:pl:nom:m3,but/subst:pl:voc:m2,but/subst:pl:voc:m3,buta/subst:pl:acc:f,buta/subst:pl:nom:f,buta/subst:pl:voc:f,buta/subst:sg:gen:f] -> buty[but/subst:pl:acc:m2,but/subst:pl:acc:m3,but/subst:pl:nom:m2,but/subst:pl:nom:m3,but/subst:pl:voc:m2,but/subst:pl:voc:m3,buta/subst:pl:acc:f,buta/subst:pl:nom:f,buta/subst:pl:voc:f]\n" + "buty[1]: buty[but/subst:pl:acc:m2,but/subst:pl:acc:m3,but/subst:pl:nom:m2,but/subst:pl:nom:m3,but/subst:pl:voc:m2,but/subst:pl:voc:m3,buta/subst:pl:acc:f,buta/subst:pl:nom:f,buta/subst:pl:voc:f] -> buty[but/subst:pl:acc:m2,but/subst:pl:acc:m3,but/subst:pl:nom:m2,but/subst:pl:nom:m3,but/subst:pl:voc:m2,but/subst:pl:voc:m3]\n" + "nom_jest_nom[1]: buty[but/subst:pl:acc:m2,but/subst:pl:acc:m3,but/subst:pl:nom:m2,but/subst:pl:nom:m3,but/subst:pl:voc:m2,but/subst:pl:voc:m3] -> buty[but/subst:pl:nom:m2,but/subst:pl:nom:m3]\n" + "unify_adj_subst[2]: buty[but/subst:pl:nom:m2,but/subst:pl:nom:m3] -> buty[but/subst:pl:nom:m2,but/subst:pl:nom:m3]\n" + "SUBST_NOM_VOC_VERB[6]: buty[but/subst:pl:nom:m2,but/subst:pl:nom:m3] -> buty[but/subst:pl:nom:m2,but/subst:pl:nom:m3]\n" + "\n" + "ppas_jest[1]: są[być/verb:fin:pl:ter:imperf:nonrefl] -> są[być/verb:fin:pl:ter:imperf:nonrefl]\n" + "nom_jest_nom[1]: są[być/verb:fin:pl:ter:imperf:nonrefl] -> są[być/verb:fin:pl:ter:imperf:nonrefl]\n" + "SUBST_NOM_VOC_VERB[6]: są[być/verb:fin:pl:ter:imperf:nonrefl] -> są[być/verb:fin:pl:ter:imperf:nonrefl]\n" + "BYC_ADJ_ACC_NOM[1]: są[być/verb:fin:pl:ter:imperf:nonrefl] -> są[być/verb:fin:pl:ter:imperf:nonrefl]\n" + "\n" + "ppas_jest[1]: wskazane[wskazany/adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos,wskazany/adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos,wskazany/adj:sg:acc:n1.n2:pos,wskazany/adj:sg:nom.voc:n1.n2:pos,wskazać/ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff,wskazać/ppas:sg:nom.acc.voc:n1.n2:perf:aff] -> wskazane[wskazać/ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff]\n" + "nom_jest_nom[1]: wskazane[wskazać/ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff] -> wskazane[wskazać/ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff]\n" + "BYC_ADJ_ACC_NOM[1]: wskazane[wskazać/ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff] -> wskazane[wskazać/ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff]\n", sent.getAnnotations());
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class DashRuleTest method check.
/**
* Check the text against the compound rule.
* @param expectedErrors the number of expected errors
* @param text the text to check
* @param expSuggestions the expected suggestions
*/
public void check(int expectedErrors, String text, String[] expSuggestions) throws IOException {
assertNotNull("Please initialize langTool!", langTool);
assertNotNull("Please initialize 'rule'!", rule);
RuleMatch[] ruleMatches = rule.match(langTool.getAnalyzedSentence(text));
assertEquals("Expected " + expectedErrors + "errors, but got: " + Arrays.toString(ruleMatches), expectedErrors, ruleMatches.length);
if (expSuggestions != null && expectedErrors != 1) {
throw new RuntimeException("Sorry, test case can only check suggestion if there's one rule match");
}
if (expSuggestions != null) {
RuleMatch ruleMatch = ruleMatches[0];
String errorMessage = String.format("Got these suggestions: %s, expected %s ", ruleMatch.getSuggestedReplacements(), Arrays.toString(expSuggestions));
assertEquals(errorMessage, expSuggestions.length, ruleMatch.getSuggestedReplacements().size());
int i = 0;
for (Object element : ruleMatch.getSuggestedReplacements()) {
String suggestion = (String) element;
assertEquals(expSuggestions[i], suggestion);
i++;
}
}
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class ToolsTest method testCheck.
@Test
public void testCheck() throws IOException, ParserConfigurationException, SAXException {
final JLanguageTool tool = new JLanguageTool(new Polish());
List<RuleMatch> matches = tool.check("To jest całkowicie prawidłowe zdanie.");
assertEquals(0, matches.size());
List<RuleMatch> matches2 = tool.check("To jest problem problem.");
assertEquals(1, matches2.size());
assertEquals("WORD_REPEAT_RULE", matches2.get(0).getRule().getId());
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class MorfologikMalayalamSpellerRuleTest method testMorfologikSpeller.
@Test
public void testMorfologikSpeller() throws IOException {
final Malayalam language = new Malayalam();
final MorfologikMalayalamSpellerRule rule = new MorfologikMalayalamSpellerRule(TestTools.getMessages("ml"), language);
RuleMatch[] matches;
final JLanguageTool langTool = new JLanguageTool(language);
// correct sentences:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("എന്തുകൊണ്ട് അംഗത്വം")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("എങ്ങനെ അംഗമാകാം?")).length);
//test for "LanguageTool":
assertEquals(0, rule.match(langTool.getAnalyzedSentence("LanguageTool")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
//incorrect sentences:
matches = rule.match(langTool.getAnalyzedSentence("Zolw"));
// check match positions:
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(4, matches[0].getToPos());
assertEquals(matches[0].getSuggestedReplacements().isEmpty(), true);
matches = rule.match(langTool.getAnalyzedSentence("എaങ്ങനെ"));
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(7, matches[0].getToPos());
assertEquals(matches[0].getSuggestedReplacements().get(0), "എങ്ങനെ");
assertEquals(1, rule.match(langTool.getAnalyzedSentence("aõh")).length);
assertEquals(1, rule.match(langTool.getAnalyzedSentence("a")).length);
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class KhmerWordRepeatRule method match.
@Override
public RuleMatch[] match(AnalyzedSentence sentence) {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
AnalyzedTokenReadings[] tokensWithWS = sentence.getTokens();
String prevToken = "";
// we start from token 1, token 0 is SENT_START
for (int i = 1; i < tokens.length; i++) {
String token = tokens[i].getToken();
if (isWord(token) && prevToken.equalsIgnoreCase(token) && !ignore(sentence, tokensWithWS, i)) {
int prevPos = tokens[i - 1].getStartPos();
int pos = tokens[i].getStartPos();
RuleMatch ruleMatch = new RuleMatch(this, prevPos, pos + prevToken.length(), messages.getString("repetition"), messages.getString("desc_repetition_short"));
List<String> replacements = new ArrayList<>();
// case 1: replace zero-width space w/ real space
replacements.add(prevToken + " " + token);
// case 2: remove repeated word - same as original suggestion
replacements.add(prevToken);
// case 3: same as case 2, just add "repetition character"
replacements.add(prevToken + "ៗ");
ruleMatch.setSuggestedReplacements(replacements);
ruleMatches.add(ruleMatch);
}
prevToken = token;
}
return toRuleMatchArray(ruleMatches);
}
Aggregations