Search in sources :

Example 71 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class TokenVerbAgreementRuleTest method setUp.

@Before
public void setUp() throws IOException {
    rule = new TokenVerbAgreementRule(TestTools.getMessages("uk"));
    langTool = new JLanguageTool(new Ukrainian());
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) JLanguageTool(org.languagetool.JLanguageTool) Before(org.junit.Before)

Example 72 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class UkrainianWordRepeatRuleTest method setUp.

@Before
public void setUp() throws IOException {
    langTool = new JLanguageTool(new Ukrainian());
    rule = new UkrainianWordRepeatRule(TestTools.getMessages("uk"), langTool.getLanguage());
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) JLanguageTool(org.languagetool.JLanguageTool) Before(org.junit.Before)

Example 73 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class UkrainianDisambiguationRuleTest method testChunker.

@Test
public void testChunker() throws Exception {
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test) DisambiguationRuleTest(org.languagetool.tagging.disambiguation.rules.DisambiguationRuleTest)

Example 74 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class MixedAlphabetsRuleTest method testRule.

@Test
public void testRule() throws IOException {
    final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
    final JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("not mixed")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
    //incorrect sentences:
    //latin i
    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смiття"));
    // check match positions:
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
    // cyrillic i and x
    matches = rule.match(langTool.getAnalyzedSentence("mіхed"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("mixed"), matches[0].getSuggestedReplacements());
    // umlaut instead of accented і
    matches = rule.match(langTool.getAnalyzedSentence("горíти"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("горі́ти"), matches[0].getSuggestedReplacements());
    // latin i
    matches = rule.match(langTool.getAnalyzedSentence("Чорного i Азовського"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("і"), matches[0].getSuggestedReplacements());
    // cyrillic І and latin X
    matches = rule.match(langTool.getAnalyzedSentence("XІ"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
    // cyrillic X and latin I
    matches = rule.match(langTool.getAnalyzedSentence("ХI"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
    // cyrillic both X and I used for latin number
    matches = rule.match(langTool.getAnalyzedSentence("ХІ"));
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("XI"), matches[0].getSuggestedReplacements());
    // cyrillic B
    matches = rule.match(langTool.getAnalyzedSentence("Щеплення від гепатиту В."));
    assertEquals(1, matches.length);
    assertEquals("B", matches[0].getSuggestedReplacements().get(0));
    // cyrillic А
    matches = rule.match(langTool.getAnalyzedSentence("група А"));
    assertEquals(1, matches.length);
    assertEquals("A", matches[0].getSuggestedReplacements().get(0));
    // cyrillic С
    matches = rule.match(langTool.getAnalyzedSentence("На 0,6°С."));
    assertEquals(1, matches.length);
    assertEquals("0,6°C", matches[0].getSuggestedReplacements().get(0));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 75 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class MorfologikUkrainianSpellerRuleTest method testMorfologikSpeller.

@Test
public void testMorfologikSpeller() throws IOException {
    MorfologikUkrainianSpellerRule rule = new MorfologikUkrainianSpellerRule(TestTools.getMessages("uk"), new Ukrainian());
    JLanguageTool langTool = new JLanguageTool(new Ukrainian());
    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("До вас прийде заввідділу!")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("До нас приїде The Beatles!")).length);
    // soft hyphen
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("піс­ні")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("піс­ні піс­ні")).length);
    // non-breaking hyphen
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("ось‑ось")).length);
    //incorrect sentences:
    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("атакуючий"));
    // check match positions:
    assertEquals(1, matches.length);
    matches = rule.match(langTool.getAnalyzedSentence("шкляний"));
    assertEquals(1, matches.length);
    assertEquals("скляний", matches[0].getSuggestedReplacements().get(0));
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("а")).length);
    // mix alphabets
    // latin 'i'
    matches = rule.match(langTool.getAnalyzedSentence("прийдешнiй"));
    assertEquals(1, matches.length);
    assertEquals("прийдешній", matches[0].getSuggestedReplacements().get(0));
    // кличний для неістот
    matches = rule.match(langTool.getAnalyzedSentence("душе"));
    assertEquals(1, matches.length);
    // розмовний інфінітив
    matches = rule.match(langTool.getAnalyzedSentence("писать"));
    assertEquals(1, matches.length);
    // compounding
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Жакет був синьо-жовтого кольору")).length);
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("Він багато сидів на інтернет-форумах")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("Він багато сидів на інтермет-форумах")).length);
    // dynamic tagging
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("екс-креветка")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("банд-формування.")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("учбово-виховного")).length);
    // abbreviations
    RuleMatch[] match = rule.match(langTool.getAnalyzedSentence("Читання віршів Т.Г.Шевченко і Г.Тютюнника"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("Читання віршів Т. Г. Шевченко і Г. Тютюнника"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("Англі́йська мова (англ. English language, English) належить до германської групи"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("Англі́йська мова (англ English language, English) належить до германської групи"));
    assertEquals(1, match.length);
    match = rule.match(langTool.getAnalyzedSentence("100 тис. гривень"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("100 кв. м"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("100 км²"));
    assertEquals(new ArrayList<RuleMatch>(), Arrays.asList(match));
    match = rule.match(langTool.getAnalyzedSentence("100 кв м"));
    assertEquals(1, Arrays.asList(match).size());
    match = rule.match(langTool.getAnalyzedSentence("2 раза"));
    assertEquals(1, Arrays.asList(match).size());
    match = rule.match(langTool.getAnalyzedSentence("півтора раза"));
    assertEquals(0, match.length);
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Aggregations

JLanguageTool (org.languagetool.JLanguageTool)184 Test (org.junit.Test)109 RuleMatch (org.languagetool.rules.RuleMatch)57 Before (org.junit.Before)38 German (org.languagetool.language.German)16 Rule (org.languagetool.rules.Rule)16 Catalan (org.languagetool.language.Catalan)14 Ukrainian (org.languagetool.language.Ukrainian)14 English (org.languagetool.language.English)13 Polish (org.languagetool.language.Polish)12 Language (org.languagetool.Language)10 GermanyGerman (org.languagetool.language.GermanyGerman)9 PatternRule (org.languagetool.rules.patterns.PatternRule)9 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 File (java.io.File)7 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)6 Dutch (org.languagetool.language.Dutch)5 French (org.languagetool.language.French)5 ArrayList (java.util.ArrayList)4 FakeLanguage (org.languagetool.FakeLanguage)4