use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class WordListValidatorTest method testWordListValidity.
@Test
public void testWordListValidity() throws IOException {
Set<String> checked = new HashSet<>();
for (Language lang : Languages.get()) {
if (lang.getShortCode().equals("ru")) {
// skipping, Cyrillic chars not part of the validation yet
continue;
}
JLanguageTool lt = new JLanguageTool(lang);
List<Rule> rules = lt.getAllActiveRules();
for (Rule rule : rules) {
if (rule instanceof SpellingCheckRule) {
SpellingCheckRule sRule = (SpellingCheckRule) rule;
String file = sRule.getSpellingFileName();
if (JLanguageTool.getDataBroker().resourceExists(file) && !checked.contains(file)) {
System.out.println("Checking " + file);
CachingWordListLoader loader = new CachingWordListLoader();
List<String> words = loader.loadWords(file);
validateWords(words, file);
checked.add(file);
}
}
}
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class DisambiguationRuleTest method testDisambiguationRulesFromXML.
private void testDisambiguationRulesFromXML(Set<Language> ignoredLanguages) throws IOException, ParserConfigurationException, SAXException {
for (Language lang : Languages.getWithDemoLanguage()) {
if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
continue;
}
if (lang.isVariant()) {
System.out.println("Skipping variant: " + lang);
continue;
}
System.out.println("Running disambiguation tests for " + lang.getName() + "...");
DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
JLanguageTool languageTool = new JLanguageTool(lang);
if (!(languageTool.getLanguage().getDisambiguator() instanceof DemoDisambiguator)) {
long startTime = System.currentTimeMillis();
String name = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortCode() + "/disambiguation.xml";
validateRuleFile(name);
List<DisambiguationPatternRule> rules = ruleLoader.getRules(ruleLoader.getClass().getResourceAsStream(name));
for (DisambiguationPatternRule rule : rules) {
PatternTestTools.warnIfRegexpSyntaxNotKosher(rule.getPatternTokens(), rule.getId(), rule.getSubId(), lang);
}
testDisambiguationRulesFromXML(rules, languageTool, lang);
long endTime = System.currentTimeMillis();
System.out.println(rules.size() + " rules tested (" + (endTime - startTime) + "ms)");
}
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class ExampleSentenceCorrectionCreator method run.
private void run(Language lang) throws IOException {
File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
if (!basePath.exists()) {
throw new RuntimeException("basePath does not exist: " + basePath);
}
String langCode = lang.getShortCode();
File xml = new File(basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml");
List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
JLanguageTool tool = new JLanguageTool(lang);
for (Rule rule : tool.getAllRules()) {
if (!(rule instanceof PatternRule)) {
continue;
}
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
for (IncorrectExample incorrectExample : incorrectExamples) {
checkCorrections(rule, incorrectExample, xmlLines, tool);
}
}
System.err.println("Added corrections: " + addedCorrectionsCount);
for (String xmlLine : xmlLines) {
System.out.println(xmlLine);
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class ExampleSentencePrinter method run.
private void run(Language lang) throws IOException {
File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
if (!basePath.exists()) {
throw new RuntimeException("basePath does not exist: " + basePath);
}
JLanguageTool tool = new JLanguageTool(lang);
System.out.println("<html>");
System.out.println("<head>");
System.out.println(" <title>LanguageTool examples sentences</title>");
System.out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
System.out.println("</head>");
System.out.println("<body>");
int i = 1;
for (Rule rule : tool.getAllRules()) {
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
if (incorrectExamples.size() > 0) {
String example = incorrectExamples.get(0).getExample().replace("<marker>", "<b>").replace("</marker>", "</b>");
System.out.println(i + ". " + example + " [" + rule.getId() + "]<br>");
i++;
}
}
System.out.println("</body>");
System.out.println("</html>");
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class LongSentenceRuleTest method testMatch.
@Test
public void testMatch() throws Exception {
JLanguageTool languageTool = new JLanguageTool(TestTools.getDemoLanguage());
LongSentenceRule rule = new LongSentenceRule(TestTools.getEnglishMessages());
assertNoMatch(" is a rather short text.", rule, languageTool);
assertMatch("Now this is not " + "a a a a a a a a a a a " + "a a a a a a a a a a a " + "a a a a a a a a a a a " + "rather that short text.", rule, languageTool);
LongSentenceRule shortRule = new LongSentenceRule(TestTools.getEnglishMessages(), 6);
assertNoMatch("This is a rather short text.", shortRule, languageTool);
assertMatch("This is also a rather short text.", shortRule, languageTool);
assertNoMatch("These ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ don't count.", shortRule, languageTool);
assertNoMatch("one two three four five six.", shortRule, languageTool);
assertNoMatch("one two three (four) five six.", shortRule, languageTool);
assertMatch("one two three four five six seven.", shortRule, languageTool);
}
Aggregations