use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.
the class UkrainianWordRepeatRuleTest method setUp.
@Before
public void setUp() throws IOException {
langTool = new JLanguageTool(new Ukrainian());
rule = new UkrainianWordRepeatRule(TestTools.getMessages("uk"), langTool.getLanguage());
}
use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.
the class UkrainianDisambiguationRuleTest method testChunker.
@Test
public void testChunker() throws Exception {
JLanguageTool lt = new JLanguageTool(new Ukrainian());
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для годиться.");
AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.
the class UkrainianDisambiguationRuleTest method setUp.
@Before
public void setUp() {
tagger = new UkrainianTagger();
tokenizer = new UkrainianWordTokenizer();
sentenceTokenizer = new SRXSentenceTokenizer(new Ukrainian());
disambiguator = new UkrainianHybridDisambiguator();
demoDisambiguator = new DemoDisambiguator();
chunker = new MultiWordChunker("/uk/multiwords.txt", true);
}
use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.
the class MixedAlphabetsRuleTest method testRule.
@Test
public void testRule() throws IOException {
final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
final JLanguageTool langTool = new JLanguageTool(new Ukrainian());
// correct sentences:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("not mixed")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
//incorrect sentences:
//latin i
RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смiття"));
// check match positions:
assertEquals(1, matches.length);
assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
// cyrillic i and x
matches = rule.match(langTool.getAnalyzedSentence("mіхed"));
assertEquals(1, matches.length);
assertEquals(Arrays.asList("mixed"), matches[0].getSuggestedReplacements());
// umlaut instead of accented і
matches = rule.match(langTool.getAnalyzedSentence("горíти"));
assertEquals(1, matches.length);
assertEquals(Arrays.asList("горі́ти"), matches[0].getSuggestedReplacements());
// latin i
matches = rule.match(langTool.getAnalyzedSentence("Чорного i Азовського"));
assertEquals(1, matches.length);
assertEquals(Arrays.asList("і"), matches[0].getSuggestedReplacements());
// cyrillic І and latin X
matches = rule.match(langTool.getAnalyzedSentence("XІ"));
assertEquals(1, matches.length);
assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
// cyrillic X and latin I
matches = rule.match(langTool.getAnalyzedSentence("ХI"));
assertEquals(1, matches.length);
assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
// cyrillic both X and I used for latin number
matches = rule.match(langTool.getAnalyzedSentence("ХІ"));
assertEquals(1, matches.length);
assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
// cyrillic B
matches = rule.match(langTool.getAnalyzedSentence("Щеплення від гепатиту В."));
assertEquals(1, matches.length);
assertEquals("B", matches[0].getSuggestedReplacements().get(0));
// cyrillic А
matches = rule.match(langTool.getAnalyzedSentence("група А"));
assertEquals(1, matches.length);
assertEquals("A", matches[0].getSuggestedReplacements().get(0));
// cyrillic С
matches = rule.match(langTool.getAnalyzedSentence("На 0,6°С."));
assertEquals(1, matches.length);
assertEquals("0,6°C", matches[0].getSuggestedReplacements().get(0));
}
use of org.languagetool.language.Ukrainian in project languagetool by languagetool-org.
the class MorfologikUkrainianSpellerRuleTest method testMorfologikSpeller.
@Test
public void testMorfologikSpeller() throws IOException {
MorfologikUkrainianSpellerRule rule = new MorfologikUkrainianSpellerRule(TestTools.getMessages("uk"), new Ukrainian());
JLanguageTool langTool = new JLanguageTool(new Ukrainian());
// correct sentences:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("До вас прийде заввідділу!")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("До нас приїде The Beatles!")).length);
// soft hyphen
assertEquals(0, rule.match(langTool.getAnalyzedSentence("пісні")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("пісні пісні")).length);
// non-breaking hyphen
assertEquals(0, rule.match(langTool.getAnalyzedSentence("ось‑ось")).length);
//incorrect sentences:
RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("атакуючий"));
// check match positions:
assertEquals(1, matches.length);
matches = rule.match(langTool.getAnalyzedSentence("шкляний"));
assertEquals(1, matches.length);
assertEquals("скляний", matches[0].getSuggestedReplacements().get(0));
assertEquals(0, rule.match(langTool.getAnalyzedSentence("а")).length);
// mix alphabets
// latin 'i'
matches = rule.match(langTool.getAnalyzedSentence("прийдешнiй"));
assertEquals(1, matches.length);
assertEquals("прийдешній", matches[0].getSuggestedReplacements().get(0));
// кличний для неістот
matches = rule.match(langTool.getAnalyzedSentence("душе"));
assertEquals(1, matches.length);
// розмовний інфінітив
matches = rule.match(langTool.getAnalyzedSentence("писать"));
assertEquals(1, matches.length);
// compounding
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Жакет був синьо-жовтого кольору")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Він багато сидів на інтернет-форумах")).length);
assertEquals(1, rule.match(langTool.getAnalyzedSentence("Він багато сидів на інтермет-форумах")).length);
// dynamic tagging
assertEquals(0, rule.match(langTool.getAnalyzedSentence("екс-креветка")).length);
assertEquals(1, rule.match(langTool.getAnalyzedSentence("банд-формування.")).length);
assertEquals(1, rule.match(langTool.getAnalyzedSentence("учбово-виховного")).length);
// abbreviations
RuleMatch[] match = rule.match(langTool.getAnalyzedSentence("Читання віршів Т.Г.Шевченко і Г.Тютюнника"));
assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
match = rule.match(langTool.getAnalyzedSentence("Читання віршів Т. Г. Шевченко і Г. Тютюнника"));
assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
match = rule.match(langTool.getAnalyzedSentence("Англі́йська мова (англ. English language, English) належить до германської групи"));
assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
match = rule.match(langTool.getAnalyzedSentence("Англі́йська мова (англ English language, English) належить до германської групи"));
assertEquals(1, match.length);
match = rule.match(langTool.getAnalyzedSentence("100 тис. гривень"));
assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
match = rule.match(langTool.getAnalyzedSentence("100 кв. м"));
assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
match = rule.match(langTool.getAnalyzedSentence("100 км²"));
assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
match = rule.match(langTool.getAnalyzedSentence("100 кв м"));
assertEquals(1, Arrays.asList(match).size());
match = rule.match(langTool.getAnalyzedSentence("2 раза"));
assertEquals(1, Arrays.asList(match).size());
match = rule.match(langTool.getAnalyzedSentence("півтора раза"));
assertEquals(0, match.length);
}
Aggregations