Search in sources :

Example 11 with Polish

use of org.languagetool.language.Polish in project languagetool by languagetool-org.

the class MultipleWhitespaceRuleTest method testRule.

@Test
public void testRule() throws IOException {
    final MultipleWhitespaceRule rule = new MultipleWhitespaceRule(TestTools.getEnglishMessages(), new Polish());
    final JLanguageTool langTool = new JLanguageTool(new Polish());
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("To jest test.")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("To jest   test.")).length);
}
Also used : Polish(org.languagetool.language.Polish) JLanguageTool(org.languagetool.JLanguageTool) MultipleWhitespaceRule(org.languagetool.rules.MultipleWhitespaceRule) Test(org.junit.Test)

Example 12 with Polish

use of org.languagetool.language.Polish in project languagetool by languagetool-org.

the class UppercaseSentenceStartRuleTest method testPolishSpecialCases.

@Test
public void testPolishSpecialCases() throws IOException {
    final JLanguageTool lt = new JLanguageTool(new Polish());
    assertEquals(0, lt.check("Zdanie.").size());
    assertEquals(0, lt.check("To jest lista punktowana:\n\npunkt pierwszy,\n\npunkt drugi,\n\npunkt trzeci.").size());
}
Also used : Polish(org.languagetool.language.Polish) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 13 with Polish

use of org.languagetool.language.Polish in project languagetool by languagetool-org.

the class WordRepeatRuleTest method testRulePolish.

@Test
public void testRulePolish() throws IOException {
    final Polish polish = new Polish();
    final WordRepeatRule rule = new WordRepeatRule(TestTools.getEnglishMessages(), polish);
    RuleMatch[] matches;
    final JLanguageTool langTool = new JLanguageTool(polish);
    // correct sentences:
    matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie."));
    assertEquals(0, matches.length);
    // with immunized words:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("W w. XVI język jest jak kipiący kocioł.")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Co jeszcze było smutniejsze, to to, że im się jeść chciało potężnie.")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Tra ta ta!")).length);
    // incorrect sentences:
    matches = rule.match(langTool.getAnalyzedSentence("To jest jest zdanie."));
    assertEquals(1, matches.length);
}
Also used : Polish(org.languagetool.language.Polish) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) WordRepeatRule(org.languagetool.rules.WordRepeatRule) Test(org.junit.Test)

Example 14 with Polish

use of org.languagetool.language.Polish in project languagetool by languagetool-org.

the class PolishDisambiguationRuleTest method setUp.

@Before
public void setUp() {
    tagger = new PolishTagger();
    tokenizer = new WordTokenizer();
    sentenceTokenizer = new SRXSentenceTokenizer(new Polish());
    disambiguator = new MultiWordChunker("/pl/multiwords.txt");
}
Also used : PolishTagger(org.languagetool.tagging.pl.PolishTagger) Polish(org.languagetool.language.Polish) WordTokenizer(org.languagetool.tokenizers.WordTokenizer) SRXSentenceTokenizer(org.languagetool.tokenizers.SRXSentenceTokenizer) Before(org.junit.Before)

Example 15 with Polish

use of org.languagetool.language.Polish in project languagetool by languagetool-org.

the class PolishWordTokenizerTest method testTokenize.

@Test
public void testTokenize() {
    final PolishWordTokenizer wordTokenizer = new PolishWordTokenizer();
    final List<String> tokens = wordTokenizer.tokenize("To jest  test");
    assertEquals(tokens.size(), 6);
    assertEquals("[To,  , jest,  ,  , test]", tokens.toString());
    final List<String> tokens2 = wordTokenizer.tokenize("To\rłamie");
    assertEquals(3, tokens2.size());
    assertEquals("[To, \r, łamie]", tokens2.toString());
    //hyphen with no whitespace
    final List<String> tokens3 = wordTokenizer.tokenize("A to jest-naprawdę-test!");
    assertEquals(tokens3.size(), 6);
    assertEquals("[A,  , to,  , jest-naprawdę-test, !]", tokens3.toString());
    //hyphen at the end of the word
    final List<String> tokens4 = wordTokenizer.tokenize("Niemiecko- i angielsko-polski");
    assertEquals(tokens4.size(), 6);
    assertEquals("[Niemiecko, -,  , i,  , angielsko-polski]", tokens4.toString());
    //hyphen probably instead of mdash
    final List<String> tokens5 = wordTokenizer.tokenize("Widzę krowę -i to dobrze!");
    assertEquals(11, tokens5.size());
    assertEquals("[Widzę,  , krowę,  , -, i,  , to,  , dobrze, !]", tokens5.toString());
    //mdash
    final List<String> tokens6 = wordTokenizer.tokenize("A to jest zdanie—rzeczywiście—z wtrąceniem.");
    assertEquals(tokens6.size(), 14);
    assertEquals("[A,  , to,  , jest,  , zdanie, —, rzeczywiście, —, z,  , wtrąceniem, .]", tokens6.toString());
    //compound words with hyphens
    final String compoundSentence = "To jest kobieta-wojownik w polsko-czeskim ubraniu, która wysłała dwa SMS-y.";
    List<String> compoundTokens = wordTokenizer.tokenize(compoundSentence);
    assertEquals(21, compoundTokens.size());
    assertEquals("[To,  , jest,  , kobieta-wojownik,  , w,  , polsko-czeskim,  , ubraniu, ,,  , która,  , wysłała,  , dwa,  , SMS-y, .]", compoundTokens.toString());
    //now setup the tagger...
    Language pl = new Polish();
    wordTokenizer.setTagger(pl.getTagger());
    compoundTokens = wordTokenizer.tokenize(compoundSentence);
    //we should get 4 more tokens: two hyphen tokens and two for the split words
    assertEquals(25, compoundTokens.size());
    assertEquals("[To,  , jest,  , kobieta, -, wojownik,  , " + "w,  , polsko, -, czeskim,  , ubraniu, ,,  " + ", która,  , wysłała,  , dwa,  , SMS-y, .]", compoundTokens.toString());
    compoundTokens = wordTokenizer.tokenize("Miała osiemnaście-dwadzieścia lat.");
    assertEquals(8, compoundTokens.size());
    assertEquals("[Miała,  , osiemnaście, -, dwadzieścia,  , lat, .]", compoundTokens.toString());
    // now three-part adja-adja-adj...:
    compoundTokens = wordTokenizer.tokenize("Słownik polsko-niemiecko-indonezyjski");
    assertEquals(7, compoundTokens.size());
    assertEquals("[Słownik,  , polsko, -, niemiecko, -, indonezyjski]", compoundTokens.toString());
    // number ranges:
    compoundTokens = wordTokenizer.tokenize("Impreza odbędzie się w dniach 1-23 maja.");
    assertEquals(16, compoundTokens.size());
    assertEquals("[Impreza,  , odbędzie,  , się,  , w,  , dniach,  , 1, -, 23,  , maja, .]", compoundTokens.toString());
    // number ranges:
    compoundTokens = wordTokenizer.tokenize("Impreza odbędzie się w dniach 1--23 maja.");
    assertEquals(18, compoundTokens.size());
    assertEquals("[Impreza,  , odbędzie,  , się,  , w,  , dniach,  , 1, -, , -, 23,  , maja, .]", compoundTokens.toString());
}
Also used : Polish(org.languagetool.language.Polish) Language(org.languagetool.Language) Test(org.junit.Test)

Aggregations

Polish (org.languagetool.language.Polish)17 Test (org.junit.Test)13 JLanguageTool (org.languagetool.JLanguageTool)12 RuleMatch (org.languagetool.rules.RuleMatch)6 Before (org.junit.Before)4 InputStream (java.io.InputStream)1 Scanner (java.util.Scanner)1 Language (org.languagetool.Language)1 English (org.languagetool.language.English)1 AbstractCompoundRuleTest (org.languagetool.rules.AbstractCompoundRuleTest)1 MultipleWhitespaceRule (org.languagetool.rules.MultipleWhitespaceRule)1 WordRepeatRule (org.languagetool.rules.WordRepeatRule)1 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)1 Match (org.languagetool.rules.patterns.Match)1 MatchState (org.languagetool.rules.patterns.MatchState)1 BitextPatternRule (org.languagetool.rules.patterns.bitext.BitextPatternRule)1 FalseFriendsAsBitextLoader (org.languagetool.rules.patterns.bitext.FalseFriendsAsBitextLoader)1 PolishTagger (org.languagetool.tagging.pl.PolishTagger)1 SRXSentenceTokenizer (org.languagetool.tokenizers.SRXSentenceTokenizer)1 WordTokenizer (org.languagetool.tokenizers.WordTokenizer)1