use of org.languagetool.language.Polish in project languagetool by languagetool-org.
the class MultipleWhitespaceRuleTest method testRule.
@Test
public void testRule() throws IOException {
final MultipleWhitespaceRule rule = new MultipleWhitespaceRule(TestTools.getEnglishMessages(), new Polish());
final JLanguageTool langTool = new JLanguageTool(new Polish());
assertEquals(0, rule.match(langTool.getAnalyzedSentence("To jest test.")).length);
assertEquals(1, rule.match(langTool.getAnalyzedSentence("To jest test.")).length);
}
use of org.languagetool.language.Polish in project languagetool by languagetool-org.
the class UppercaseSentenceStartRuleTest method testPolishSpecialCases.
@Test
public void testPolishSpecialCases() throws IOException {
final JLanguageTool lt = new JLanguageTool(new Polish());
assertEquals(0, lt.check("Zdanie.").size());
assertEquals(0, lt.check("To jest lista punktowana:\n\npunkt pierwszy,\n\npunkt drugi,\n\npunkt trzeci.").size());
}
use of org.languagetool.language.Polish in project languagetool by languagetool-org.
the class WordRepeatRuleTest method testRulePolish.
@Test
public void testRulePolish() throws IOException {
final Polish polish = new Polish();
final WordRepeatRule rule = new WordRepeatRule(TestTools.getEnglishMessages(), polish);
RuleMatch[] matches;
final JLanguageTool langTool = new JLanguageTool(polish);
// correct sentences:
matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie."));
assertEquals(0, matches.length);
// with immunized words:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("W w. XVI język jest jak kipiący kocioł.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Co jeszcze było smutniejsze, to to, że im się jeść chciało potężnie.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Tra ta ta!")).length);
// incorrect sentences:
matches = rule.match(langTool.getAnalyzedSentence("To jest jest zdanie."));
assertEquals(1, matches.length);
}
use of org.languagetool.language.Polish in project languagetool by languagetool-org.
the class PolishDisambiguationRuleTest method setUp.
@Before
public void setUp() {
tagger = new PolishTagger();
tokenizer = new WordTokenizer();
sentenceTokenizer = new SRXSentenceTokenizer(new Polish());
disambiguator = new MultiWordChunker("/pl/multiwords.txt");
}
use of org.languagetool.language.Polish in project languagetool by languagetool-org.
the class PolishWordTokenizerTest method testTokenize.
@Test
public void testTokenize() {
final PolishWordTokenizer wordTokenizer = new PolishWordTokenizer();
final List<String> tokens = wordTokenizer.tokenize("To jest test");
assertEquals(tokens.size(), 6);
assertEquals("[To, , jest, , , test]", tokens.toString());
final List<String> tokens2 = wordTokenizer.tokenize("To\rłamie");
assertEquals(3, tokens2.size());
assertEquals("[To, \r, łamie]", tokens2.toString());
//hyphen with no whitespace
final List<String> tokens3 = wordTokenizer.tokenize("A to jest-naprawdę-test!");
assertEquals(tokens3.size(), 6);
assertEquals("[A, , to, , jest-naprawdę-test, !]", tokens3.toString());
//hyphen at the end of the word
final List<String> tokens4 = wordTokenizer.tokenize("Niemiecko- i angielsko-polski");
assertEquals(tokens4.size(), 6);
assertEquals("[Niemiecko, -, , i, , angielsko-polski]", tokens4.toString());
//hyphen probably instead of mdash
final List<String> tokens5 = wordTokenizer.tokenize("Widzę krowę -i to dobrze!");
assertEquals(11, tokens5.size());
assertEquals("[Widzę, , krowę, , -, i, , to, , dobrze, !]", tokens5.toString());
//mdash
final List<String> tokens6 = wordTokenizer.tokenize("A to jest zdanie—rzeczywiście—z wtrąceniem.");
assertEquals(tokens6.size(), 14);
assertEquals("[A, , to, , jest, , zdanie, —, rzeczywiście, —, z, , wtrąceniem, .]", tokens6.toString());
//compound words with hyphens
final String compoundSentence = "To jest kobieta-wojownik w polsko-czeskim ubraniu, która wysłała dwa SMS-y.";
List<String> compoundTokens = wordTokenizer.tokenize(compoundSentence);
assertEquals(21, compoundTokens.size());
assertEquals("[To, , jest, , kobieta-wojownik, , w, , polsko-czeskim, , ubraniu, ,, , która, , wysłała, , dwa, , SMS-y, .]", compoundTokens.toString());
//now setup the tagger...
Language pl = new Polish();
wordTokenizer.setTagger(pl.getTagger());
compoundTokens = wordTokenizer.tokenize(compoundSentence);
//we should get 4 more tokens: two hyphen tokens and two for the split words
assertEquals(25, compoundTokens.size());
assertEquals("[To, , jest, , kobieta, -, wojownik, , " + "w, , polsko, -, czeskim, , ubraniu, ,, " + ", która, , wysłała, , dwa, , SMS-y, .]", compoundTokens.toString());
compoundTokens = wordTokenizer.tokenize("Miała osiemnaście-dwadzieścia lat.");
assertEquals(8, compoundTokens.size());
assertEquals("[Miała, , osiemnaście, -, dwadzieścia, , lat, .]", compoundTokens.toString());
// now three-part adja-adja-adj...:
compoundTokens = wordTokenizer.tokenize("Słownik polsko-niemiecko-indonezyjski");
assertEquals(7, compoundTokens.size());
assertEquals("[Słownik, , polsko, -, niemiecko, -, indonezyjski]", compoundTokens.toString());
// number ranges:
compoundTokens = wordTokenizer.tokenize("Impreza odbędzie się w dniach 1-23 maja.");
assertEquals(16, compoundTokens.size());
assertEquals("[Impreza, , odbędzie, , się, , w, , dniach, , 1, -, 23, , maja, .]", compoundTokens.toString());
// number ranges:
compoundTokens = wordTokenizer.tokenize("Impreza odbędzie się w dniach 1--23 maja.");
assertEquals(18, compoundTokens.size());
assertEquals("[Impreza, , odbędzie, , się, , w, , dniach, , 1, -, , -, 23, , maja, .]", compoundTokens.toString());
}
Aggregations