Search in sources :

Example 21 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class SpellingCheckRuleTest method testIgnorePhrases.

@Test
public void testIgnorePhrases() throws IOException {
    JLanguageTool langTool = new JLanguageTool(new AmericanEnglish());
    assertThat(langTool.check("A test with myfoo mybar").size(), is(2));
    for (Rule rule : langTool.getAllActiveRules()) {
        if (rule instanceof SpellingCheckRule) {
            ((SpellingCheckRule) rule).acceptPhrases(Arrays.asList("myfoo mybar", "Myy othertest"));
        } else {
            langTool.disableRule(rule.getId());
        }
    }
    assertThat(langTool.check("A test with myfoo mybar").size(), is(0));
    // the words on their own are not ignored
    assertThat(langTool.check("A test with myfoo and mybar").size(), is(2));
    assertThat(langTool.check("myfoo mybar here").size(), is(0));
    assertThat(langTool.check("Myfoo mybar here").size(), is(0));
    assertThat(langTool.check("MYfoo mybar here").size(), is(2));
    assertThat(langTool.check("Myy othertest is okay").size(), is(0));
    assertThat(langTool.check("And Myy othertest is okay").size(), is(0));
    assertThat(langTool.check("But Myy Othertest is not okay").size(), is(2));
    assertThat(langTool.check("But myy othertest is not okay").size(), is(2));
}
Also used : SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) JLanguageTool(org.languagetool.JLanguageTool) AmericanEnglish(org.languagetool.language.AmericanEnglish) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) Rule(org.languagetool.rules.Rule) Test(org.junit.Test)

Example 22 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class UppercaseSentenceStartRuleTest method testRule.

@Test
public void testRule() throws IOException {
    JLanguageTool lt = new JLanguageTool(new English());
    assertEquals(0, lt.check("In Nov. next year.").size());
    assertEquals(0, lt.check("www.languagetool.org is a website.").size());
    assertEquals(0, lt.check("Languagetool.org is a website.").size());
    assertEquals(1, lt.check("languagetool.org is a website.").size());
    assertEquals(1, lt.check("a sentence.").size());
    assertEquals(1, lt.check("a sentence!").size());
}
Also used : English(org.languagetool.language.English) JLanguageTool(org.languagetool.JLanguageTool) Test(org.junit.Test)

Example 23 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class EnglishChunkerTest method testContractions.

@Test
public void testContractions() throws Exception {
    JLanguageTool langTool = new JLanguageTool(new English());
    AnalyzedSentence analyzedSentence = langTool.getAnalyzedSentence("I'll be there");
    AnalyzedTokenReadings[] tokens = analyzedSentence.getTokens();
    assertThat(tokens[1].getChunkTags().get(0), is(new ChunkTag("B-NP-singular")));
    // "'" cannot be mapped as we tokenize differently
    assertThat(tokens[2].getChunkTags().size(), is(0));
    // "ll" cannot be mapped as we tokenize differently
    assertThat(tokens[3].getChunkTags().size(), is(0));
    assertThat(tokens[5].getChunkTags().get(0), is(new ChunkTag("I-VP")));
}
Also used : English(org.languagetool.language.English) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 24 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class EnglishChunkerTest method testAddChunkTagsSingular.

@Test
public void testAddChunkTagsSingular() throws Exception {
    EnglishChunker chunker = new EnglishChunker();
    JLanguageTool lt = new JLanguageTool(new English());
    List<AnalyzedSentence> sentences = lt.analyzeText("The abacus shows how numbers can be stored");
    List<AnalyzedTokenReadings> readingsList = Arrays.asList(sentences.get(0).getTokens());
    chunker.addChunkTags(readingsList);
    // "The abacus":
    assertThat(readingsList.get(1).getChunkTags().toString(), is("[B-NP-singular]"));
    assertThat(readingsList.get(3).getChunkTags().toString(), is("[E-NP-singular]"));
    // "numbers":
    assertThat(readingsList.get(9).getChunkTags().toString(), is("[B-NP-plural, E-NP-plural]"));
}
Also used : English(org.languagetool.language.English) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 25 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class AbstractEnglishSpellerRuleTest method testNonVariantSpecificSuggestions.

public void testNonVariantSpecificSuggestions(Rule rule, Language language) throws IOException {
    this.lt = new JLanguageTool(language);
    this.rule = rule;
    assertFirstMatch("teh", "the");
    // from http://waxy.org/2003/04/typo_popularity/:
    assertFirstMatch("transexual", "transsexual");
    //assertFirstMatch("didnt", "didn't"); - covered by ContractionSpellingRule
    //assertFirstMatch("doesnt", "doesn't"); - covered by ContractionSpellingRule
    assertFirstMatch("seperate", "separate");
    assertFirstMatch("definately", "definitely");
    assertFirstMatch("recieve", "receive");
    assertFirstMatch("offical", "official");
    assertFirstMatch("managment", "management");
    assertFirstMatch("goverment ", "government");
    assertFirstMatch("commerical", "commercial");
    assertFirstMatch("Febuary", "February");
    assertFirstMatch("enviroment", "environment");
    assertFirstMatch("occurence", "occurrence");
    assertFirstMatch("commision", "commission");
    assertFirstMatch("assocation", "association");
    assertFirstMatch("Cincinatti", "Cincinnati");
    assertFirstMatch("milennium", "millennium");
    assertFirstMatch("accomodation", "accommodation");
    assertFirstMatch("foriegn", "foreign");
    assertFirstMatch("chemcial", "chemical");
    assertFirstMatch("developement", "development");
    assertFirstMatch("maintainance", "maintenance");
    assertFirstMatch("restaraunt", "restaurant");
    assertFirstMatch("garentee", "guarantee");
    assertFirstMatch("greatful", "grateful");
    assertFirstMatch("hipocrit", "hypocrite");
    assertFirstMatch("mischevious", "mischievous");
    assertFirstMatch("hygeine", "hygiene");
    assertFirstMatch("vehical", "medical", "vehicle");
    //assertFirstMatch("calender", "calendar");  // handled by grammar.xml
    assertEquals(0, rule.match(lt.getAnalyzedSentence("You couldn't; he didn't; it doesn't; they aren't; I hadn't; etc.")).length);
    // currently solved as a special case, also see https://github.com/morfologik/morfologik-stemming/issues/32:
    assertFirstMatch("alot", "a lot");
    // currently solved as a special case (AbstractEnglishSpellerRule.getAdditionalTopSuggestions()):
    assertFirstMatch("speach", "speech");
    // TODO: these are not very good, maybe caused by https://github.com/morfologik/morfologik-stemming/issues/30?
    assertFirstMatch("rythem", "them", "rather", "rhythm");
    assertFirstMatch("vacume", "value", "volume", "acute", "vacuum");
// TODO:
// http://grammar.yourdictionary.com/spelling-and-word-lists/misspelled.html
// https://en.wikipedia.org/wiki/Commonly_misspelled_English_words#cite_note-YD-4
}
Also used : JLanguageTool(org.languagetool.JLanguageTool)

Aggregations

JLanguageTool (org.languagetool.JLanguageTool)184 Test (org.junit.Test)109 RuleMatch (org.languagetool.rules.RuleMatch)57 Before (org.junit.Before)38 German (org.languagetool.language.German)16 Rule (org.languagetool.rules.Rule)16 Catalan (org.languagetool.language.Catalan)14 Ukrainian (org.languagetool.language.Ukrainian)14 English (org.languagetool.language.English)13 Polish (org.languagetool.language.Polish)12 Language (org.languagetool.Language)10 GermanyGerman (org.languagetool.language.GermanyGerman)9 PatternRule (org.languagetool.rules.patterns.PatternRule)9 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 File (java.io.File)7 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)6 Dutch (org.languagetool.language.Dutch)5 French (org.languagetool.language.French)5 ArrayList (java.util.ArrayList)4 FakeLanguage (org.languagetool.FakeLanguage)4