Search in sources :

Example 6 with Ukrainian

use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.

the class UkrainianWordRepeatRuleTest method setUp.

@Before
public void setUp() throws IOException {
    langTool = new JLanguageTool(new Ukrainian());
    rule = new UkrainianWordRepeatRule(TestTools.getMessages("uk"), langTool.getLanguage());
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) JLanguageTool(org.languagetool.JLanguageTool) Before(org.junit.Before)

Example 7 with Ukrainian

use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.

the class UkrainianDisambiguationRuleTest method testChunker.

@Test
public void testChunker() throws Exception {
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test) DisambiguationRuleTest(org.languagetool.tagging.disambiguation.rules.DisambiguationRuleTest)

Example 8 with Ukrainian

use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.

the class UkrainianDisambiguationRuleTest method setUp.

@Before
public void setUp() {
    tagger = new UkrainianTagger();
    tokenizer = new UkrainianWordTokenizer();
    sentenceTokenizer = new SRXSentenceTokenizer(new Ukrainian());
    disambiguator = new UkrainianHybridDisambiguator();
    demoDisambiguator = new DemoDisambiguator();
    chunker = new MultiWordChunker("/uk/multiwords.txt", true);
}
Also used : DemoDisambiguator(org.languagetool.tagging.disambiguation.xx.DemoDisambiguator) Ukrainian(org.languagetool.language.Ukrainian) MultiWordChunker(org.languagetool.tagging.disambiguation.MultiWordChunker) UkrainianTagger(org.languagetool.tagging.uk.UkrainianTagger) UkrainianHybridDisambiguator(org.languagetool.tagging.disambiguation.uk.UkrainianHybridDisambiguator) UkrainianWordTokenizer(org.languagetool.tokenizers.uk.UkrainianWordTokenizer) SRXSentenceTokenizer(org.languagetool.tokenizers.SRXSentenceTokenizer) Before(org.junit.Before)

Example 9 with Ukrainian

use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.

the class MixedAlphabetsRuleTest method testRule.

@Test
public void testRule() throws IOException {
    final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
    final JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("not mixed")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
    //incorrect sentences:
    //latin i
    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смiття"));
    // check match positions:
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
    // cyrillic i and x
    matches = rule.match(langTool.getAnalyzedSentence("mіхed"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("mixed"), matches[0].getSuggestedReplacements());
    // umlaut instead of accented і
    matches = rule.match(langTool.getAnalyzedSentence("горíти"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("горі́ти"), matches[0].getSuggestedReplacements());
    // latin i
    matches = rule.match(langTool.getAnalyzedSentence("Чорного i Азовського"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("і"), matches[0].getSuggestedReplacements());
    // cyrillic І and latin X
    matches = rule.match(langTool.getAnalyzedSentence("XІ"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
    // cyrillic X and latin I
    matches = rule.match(langTool.getAnalyzedSentence("ХI"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
    // cyrillic both X and I used for latin number
    matches = rule.match(langTool.getAnalyzedSentence("ХІ"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
    // cyrillic B
    matches = rule.match(langTool.getAnalyzedSentence("Щеплення від гепатиту В."));
    assertEquals(1, matches.length);
    assertEquals("B", matches[0].getSuggestedReplacements().get(0));
    // cyrillic А
    matches = rule.match(langTool.getAnalyzedSentence("група А"));
    assertEquals(1, matches.length);
    assertEquals("A", matches[0].getSuggestedReplacements().get(0));
    // cyrillic С
    matches = rule.match(langTool.getAnalyzedSentence("На 0,6°С."));
    assertEquals(1, matches.length);
    assertEquals("0,6°C", matches[0].getSuggestedReplacements().get(0));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 10 with Ukrainian

use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.

the class MorfologikUkrainianSpellerRuleTest method testMorfologikSpeller.

@Test
public void testMorfologikSpeller() throws IOException {
    MorfologikUkrainianSpellerRule rule = new MorfologikUkrainianSpellerRule(TestTools.getMessages("uk"), new Ukrainian());
    JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("До вас прийде заввідділу!")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("До нас приїде The Beatles!")).length);
    // soft hyphen
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("піс­ні")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("піс­ні піс­ні")).length);
    // non-breaking hyphen
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("ось‑ось")).length);
    //incorrect sentences:
    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("атакуючий"));
    // check match positions:
    assertEquals(1, matches.length);
    matches = rule.match(langTool.getAnalyzedSentence("шкляний"));
    assertEquals(1, matches.length);
    assertEquals("скляний", matches[0].getSuggestedReplacements().get(0));
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("а")).length);
    // mix alphabets
    // latin 'i'
    matches = rule.match(langTool.getAnalyzedSentence("прийдешнiй"));
    assertEquals(1, matches.length);
    assertEquals("прийдешній", matches[0].getSuggestedReplacements().get(0));
    // кличний для неістот
    matches = rule.match(langTool.getAnalyzedSentence("душе"));
    assertEquals(1, matches.length);
    // розмовний інфінітив
    matches = rule.match(langTool.getAnalyzedSentence("писать"));
    assertEquals(1, matches.length);
    // compounding
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Жакет був синьо-жовтого кольору")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Він багато сидів на інтернет-форумах")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("Він багато сидів на інтермет-форумах")).length);
    // dynamic tagging
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("екс-креветка")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("банд-формування.")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("учбово-виховного")).length);
    // abbreviations
    RuleMatch[] match = rule.match(langTool.getAnalyzedSentence("Читання віршів Т.Г.Шевченко і Г.Тютюнника"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("Читання віршів Т. Г. Шевченко і Г. Тютюнника"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("Англі́йська мова (англ. English language, English) належить до германської групи"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("Англі́йська мова (англ English language, English) належить до германської групи"));
    assertEquals(1, match.length);
    match = rule.match(langTool.getAnalyzedSentence("100 тис. гривень"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("100 кв. м"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("100 км²"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("100 кв м"));
    assertEquals(1, Arrays.asList(match).size());
    match = rule.match(langTool.getAnalyzedSentence("2 раза"));
    assertEquals(1, Arrays.asList(match).size());
    match = rule.match(langTool.getAnalyzedSentence("півтора раза"));
    assertEquals(0, match.length);
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Aggregations

Ukrainian (org.languagetool.language.Ukrainian)15 JLanguageTool (org.languagetool.JLanguageTool)14 Test (org.junit.Test)10 RuleMatch (org.languagetool.rules.RuleMatch)8 Before (org.junit.Before)5 AnalyzedSentence (org.languagetool.AnalyzedSentence)2 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)2 UppercaseSentenceStartRule (org.languagetool.rules.UppercaseSentenceStartRule)1 MultiWordChunker (org.languagetool.tagging.disambiguation.MultiWordChunker)1 DisambiguationRuleTest (org.languagetool.tagging.disambiguation.rules.DisambiguationRuleTest)1 UkrainianHybridDisambiguator (org.languagetool.tagging.disambiguation.uk.UkrainianHybridDisambiguator)1 DemoDisambiguator (org.languagetool.tagging.disambiguation.xx.DemoDisambiguator)1 UkrainianTagger (org.languagetool.tagging.uk.UkrainianTagger)1 SRXSentenceTokenizer (org.languagetool.tokenizers.SRXSentenceTokenizer)1 UkrainianWordTokenizer (org.languagetool.tokenizers.uk.UkrainianWordTokenizer)1