use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class SpellingCheckRuleTest method testIgnorePhrases.
@Test
public void testIgnorePhrases() throws IOException {
JLanguageTool langTool = new JLanguageTool(new AmericanEnglish());
assertThat(langTool.check("A test with myfoo mybar").size(), is(2));
for (Rule rule : langTool.getAllActiveRules()) {
if (rule instanceof SpellingCheckRule) {
((SpellingCheckRule) rule).acceptPhrases(Arrays.asList("myfoo mybar", "Myy othertest"));
} else {
langTool.disableRule(rule.getId());
}
}
assertThat(langTool.check("A test with myfoo mybar").size(), is(0));
// the words on their own are not ignored
assertThat(langTool.check("A test with myfoo and mybar").size(), is(2));
assertThat(langTool.check("myfoo mybar here").size(), is(0));
assertThat(langTool.check("Myfoo mybar here").size(), is(0));
assertThat(langTool.check("MYfoo mybar here").size(), is(2));
assertThat(langTool.check("Myy othertest is okay").size(), is(0));
assertThat(langTool.check("And Myy othertest is okay").size(), is(0));
assertThat(langTool.check("But Myy Othertest is not okay").size(), is(2));
assertThat(langTool.check("But myy othertest is not okay").size(), is(2));
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class UppercaseSentenceStartRuleTest method testRule.
@Test
public void testRule() throws IOException {
JLanguageTool lt = new JLanguageTool(new English());
assertEquals(0, lt.check("In Nov. next year.").size());
assertEquals(0, lt.check("www.languagetool.org is a website.").size());
assertEquals(0, lt.check("Languagetool.org is a website.").size());
assertEquals(1, lt.check("languagetool.org is a website.").size());
assertEquals(1, lt.check("a sentence.").size());
assertEquals(1, lt.check("a sentence!").size());
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class EnglishChunkerTest method testContractions.
@Test
public void testContractions() throws Exception {
JLanguageTool langTool = new JLanguageTool(new English());
AnalyzedSentence analyzedSentence = langTool.getAnalyzedSentence("I'll be there");
AnalyzedTokenReadings[] tokens = analyzedSentence.getTokens();
assertThat(tokens[1].getChunkTags().get(0), is(new ChunkTag("B-NP-singular")));
// "'" cannot be mapped as we tokenize differently
assertThat(tokens[2].getChunkTags().size(), is(0));
// "ll" cannot be mapped as we tokenize differently
assertThat(tokens[3].getChunkTags().size(), is(0));
assertThat(tokens[5].getChunkTags().get(0), is(new ChunkTag("I-VP")));
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class EnglishChunkerTest method testAddChunkTagsSingular.
@Test
public void testAddChunkTagsSingular() throws Exception {
EnglishChunker chunker = new EnglishChunker();
JLanguageTool lt = new JLanguageTool(new English());
List<AnalyzedSentence> sentences = lt.analyzeText("The abacus shows how numbers can be stored");
List<AnalyzedTokenReadings> readingsList = Arrays.asList(sentences.get(0).getTokens());
chunker.addChunkTags(readingsList);
// "The abacus":
assertThat(readingsList.get(1).getChunkTags().toString(), is("[B-NP-singular]"));
assertThat(readingsList.get(3).getChunkTags().toString(), is("[E-NP-singular]"));
// "numbers":
assertThat(readingsList.get(9).getChunkTags().toString(), is("[B-NP-plural, E-NP-plural]"));
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class AbstractEnglishSpellerRuleTest method testNonVariantSpecificSuggestions.
public void testNonVariantSpecificSuggestions(Rule rule, Language language) throws IOException {
this.lt = new JLanguageTool(language);
this.rule = rule;
assertFirstMatch("teh", "the");
// from http://waxy.org/2003/04/typo_popularity/:
assertFirstMatch("transexual", "transsexual");
//assertFirstMatch("didnt", "didn't"); - covered by ContractionSpellingRule
//assertFirstMatch("doesnt", "doesn't"); - covered by ContractionSpellingRule
assertFirstMatch("seperate", "separate");
assertFirstMatch("definately", "definitely");
assertFirstMatch("recieve", "receive");
assertFirstMatch("offical", "official");
assertFirstMatch("managment", "management");
assertFirstMatch("goverment ", "government");
assertFirstMatch("commerical", "commercial");
assertFirstMatch("Febuary", "February");
assertFirstMatch("enviroment", "environment");
assertFirstMatch("occurence", "occurrence");
assertFirstMatch("commision", "commission");
assertFirstMatch("assocation", "association");
assertFirstMatch("Cincinatti", "Cincinnati");
assertFirstMatch("milennium", "millennium");
assertFirstMatch("accomodation", "accommodation");
assertFirstMatch("foriegn", "foreign");
assertFirstMatch("chemcial", "chemical");
assertFirstMatch("developement", "development");
assertFirstMatch("maintainance", "maintenance");
assertFirstMatch("restaraunt", "restaurant");
assertFirstMatch("garentee", "guarantee");
assertFirstMatch("greatful", "grateful");
assertFirstMatch("hipocrit", "hypocrite");
assertFirstMatch("mischevious", "mischievous");
assertFirstMatch("hygeine", "hygiene");
assertFirstMatch("vehical", "medical", "vehicle");
//assertFirstMatch("calender", "calendar"); // handled by grammar.xml
assertEquals(0, rule.match(lt.getAnalyzedSentence("You couldn't; he didn't; it doesn't; they aren't; I hadn't; etc.")).length);
// currently solved as a special case, also see https://github.com/morfologik/morfologik-stemming/issues/32:
assertFirstMatch("alot", "a lot");
// currently solved as a special case (AbstractEnglishSpellerRule.getAdditionalTopSuggestions()):
assertFirstMatch("speach", "speech");
// TODO: these are not very good, maybe caused by https://github.com/morfologik/morfologik-stemming/issues/30?
assertFirstMatch("rythem", "them", "rather", "rhythm");
assertFirstMatch("vacume", "value", "volume", "acute", "vacuum");
// TODO:
// http://grammar.yourdictionary.com/spelling-and-word-lists/misspelled.html
// https://en.wikipedia.org/wiki/Commonly_misspelled_English_words#cite_note-YD-4
}
Aggregations