use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class MorfologikPolishSpellerRuleTest method testMorfologikSpeller.
@Test
public void testMorfologikSpeller() throws IOException {
final MorfologikPolishSpellerRule rule = new MorfologikPolishSpellerRule(TestTools.getMessages("pl"), new Polish());
final JLanguageTool langTool = new JLanguageTool(new Polish());
// correct sentences:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("To jest test bez jakiegokolwiek błędu.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Żółw na starość wydziela dziwną woń.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Żółw na starość wydziela dziwną woń numer 1234.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("W MI-6 pracuje 15-letni agent.")).length);
//test for "LanguageTool":
assertEquals(0, rule.match(langTool.getAnalyzedSentence("LanguageTool jest świetny!")).length);
//test for the ignored uppercase word "Gdym":
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Gdym to zobaczył, zdębiałem.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
//compound word with ignored part "techniczno"
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Bogactwo nie rośnie proporcjonalnie do jej rozwoju techniczno-terytorialnego.")).length);
//compound word with one of the compound prefixes:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Antypostmodernistyczna batalia hiperfilozofów")).length);
//compound words: "trzynastobitowy", "zgniłożółty"
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Trzynastobitowe przystawki w kolorze zgniłożółtym")).length);
//incorrect sentences:
final RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("Zolw"));
// check match positions:
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(4, matches[0].getToPos());
assertEquals("Żółw", matches[0].getSuggestedReplacements().get(0));
assertEquals(1, rule.match(langTool.getAnalyzedSentence("aõh")).length);
//tokenizing on prefixes niby- and quasi-
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Niby-artysta spotkał się z quasi-opiekunem i niby-Francuzem.")).length);
final RuleMatch[] prunedMatches = rule.match(langTool.getAnalyzedSentence("Clarkem"));
assertEquals(1, prunedMatches.length);
assertEquals(5, prunedMatches[0].getSuggestedReplacements().size());
assertEquals("Clarke", prunedMatches[0].getSuggestedReplacements().get(0));
assertEquals("Clarkiem", prunedMatches[0].getSuggestedReplacements().get(1));
assertEquals("Ciarkę", prunedMatches[0].getSuggestedReplacements().get(2));
assertEquals("Clarkom", prunedMatches[0].getSuggestedReplacements().get(3));
assertEquals("Czarkę", prunedMatches[0].getSuggestedReplacements().get(4));
// There should be a match, this is not a prefix!
assertEquals(1, rule.match(langTool.getAnalyzedSentence("premoc")).length);
// "0" instead "o"...
assertEquals(1, rule.match(langTool.getAnalyzedSentence("dziwneg0")).length);
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class PolishWordRepeatRuleTest method testRule.
@Test
public void testRule() throws IOException {
final PolishWordRepeatRule rule = new PolishWordRepeatRule(TestTools.getEnglishMessages());
RuleMatch[] matches;
JLanguageTool langTool = new JLanguageTool(new Polish());
//correct
matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie próbne."));
assertEquals(0, matches.length);
matches = rule.match(langTool.getAnalyzedSentence("On tak się bardzo nie martwił, bo przecież musiał się umyć."));
assertEquals(0, matches.length);
//repeated prepositions, don't count'em
matches = rule.match(langTool.getAnalyzedSentence("Na dyskotece tańczył jeszcze, choć był na bani."));
assertEquals(0, matches.length);
// sf bug report:
matches = rule.match(langTool.getAnalyzedSentence("Żadnych „ale”."));
assertEquals(0, matches.length);
//incorrect
matches = rule.match(langTool.getAnalyzedSentence("Był on bowiem pięknym strzelcem bowiem."));
assertEquals(1, matches.length);
matches = rule.match(langTool.getAnalyzedSentence("Mówiła długo, żeby tylko mówić długo."));
assertEquals(2, matches.length);
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class MorfologikSpanishSpellerRuleTest method testMorfologikSpeller.
@Test
public void testMorfologikSpeller() throws IOException {
Spanish language = new Spanish();
MorfologikSpanishSpellerRule rule = new MorfologikSpanishSpellerRule(TestTools.getMessages("en"), language);
JLanguageTool langTool = new JLanguageTool(language);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Escriba un texto aquí. LanguageTool le ayudará a afrontar algunas dificultades propias de la escritura.")).length);
RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales."));
assertEquals(1, matches.length);
assertEquals(59, matches[0].getFromPos());
assertEquals(71, matches[0].getToPos());
assertEquals("ortográficos", matches[0].getSuggestedReplacements().get(0));
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class FalseFriendRuleTest method testHintsForGermanSpeakers.
@Test
public void testHintsForGermanSpeakers() throws IOException, ParserConfigurationException, SAXException {
JLanguageTool langTool = new JLanguageTool(new English(), new German());
List<RuleMatch> matches = assertErrors(1, "We will berate you.", langTool);
assertEquals(matches.get(0).getSuggestedReplacements().toString(), "[provide advice, give advice]");
assertErrors(0, "We will give you advice.", langTool);
assertErrors(1, "I go to high school in Foocity.", langTool);
List<RuleMatch> matches2 = assertErrors(1, "The chef", langTool);
assertEquals("[boss, chief]", matches2.get(0).getSuggestedReplacements().toString());
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class PerformanceTest2 method run.
private void run(String languageCode, File textFile) throws IOException {
String text = StringTools.readStream(new FileInputStream(textFile), "utf-8");
System.out.println("Text length: " + text.length());
Random rnd = new Random(42);
Language language = Languages.getLanguageForShortCode(languageCode);
long totalTime = 0;
for (int i = 0; i < RUNS; i++) {
int beginIndex = rnd.nextInt(text.length());
int endIndex = Math.min(beginIndex + MAX_TEXT_LENGTH, text.length() - 1);
String subText = text.substring(beginIndex, endIndex);
long startTime = System.currentTimeMillis();
MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(language);
List<RuleMatch> matches = langTool.check(subText);
long runTime = System.currentTimeMillis() - startTime;
langTool.shutdown();
if (i >= SKIP) {
totalTime += runTime;
System.out.println("Time: " + runTime + "ms (" + matches.size() + " matches)");
} else {
System.out.println("Time: " + runTime + "ms (" + matches.size() + " matches) - skipped because of warm-up");
}
}
System.out.println("Avg. Time: " + (float) totalTime / RUNS);
}
Aggregations