use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class SentenceChecker method run.
private void run(Language language, File file) throws IOException {
JLanguageTool lt = new JLanguageTool(language);
try (Scanner scanner = new Scanner(file)) {
int count = 0;
long startTime = System.currentTimeMillis();
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
lt.check(line);
if (++count % BATCH_SIZE == 0) {
long time = System.currentTimeMillis() - startTime;
System.out.println(count + ". " + time + "ms per " + BATCH_SIZE + " sentences");
startTime = System.currentTimeMillis();
}
}
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class SimpleRuleCounter method run.
private void run(List<Language> languages) {
List<Language> sortedLanguages = new ArrayList<>(languages);
sortedLanguages.sort((l1, l2) -> l1.getName().compareTo(l2.getName()));
for (Language language : sortedLanguages) {
if (language.isVariant()) {
continue;
}
JLanguageTool lt = new JLanguageTool(language);
List<Rule> allRules = lt.getAllActiveRules();
countForLanguage(allRules, language);
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class UselessExampleFinder method run.
private void run(Language lang) throws IOException {
File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
if (!basePath.exists()) {
throw new RuntimeException("basePath does not exist: " + basePath);
}
String langCode = lang.getShortCode();
File xml = new File(basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml");
List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
JLanguageTool tool = new JLanguageTool(lang);
for (Rule rule : tool.getAllActiveRules()) {
if (!(rule instanceof PatternRule)) {
continue;
}
List<CorrectExample> correctExamples = rule.getCorrectExamples();
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
for (IncorrectExample incorrectExample : incorrectExamples) {
checkCorrections(rule, correctExamples, incorrectExample, xmlLines);
}
}
System.err.println("Useless examples: " + uselessExampleCount);
System.err.println("Removed lines: " + removedLinesCount);
for (String xmlLine : xmlLines) {
System.out.println(xmlLine);
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class GermanUppercasePhraseFinder method main.
public static void main(String[] args) throws IOException {
if (args.length != 1) {
System.out.println("Usage: " + GermanUppercasePhraseFinder.class.getSimpleName() + " <ngramIndexDir>");
System.exit(1);
}
JLanguageTool lt = new JLanguageTool(Languages.getLanguageForShortCode("de"));
FSDirectory fsDir = FSDirectory.open(new File(args[0]).toPath());
IndexReader reader = DirectoryReader.open(fsDir);
IndexSearcher searcher = new IndexSearcher(reader);
Fields fields = MultiFields.getFields(reader);
Terms terms = fields.terms("ngram");
TermsEnum termsEnum = terms.iterator();
int count = 0;
BytesRef next;
while ((next = termsEnum.next()) != null) {
String term = next.utf8ToString();
count++;
//term = "persischer Golf"; // for testing
String[] parts = term.split(" ");
boolean useful = true;
int lcCount = 0;
List<String> ucParts = new ArrayList<>();
for (String part : parts) {
if (part.length() < MIN_TERM_LEN) {
useful = false;
break;
}
String uc = StringTools.uppercaseFirstChar(part);
if (!part.equals(uc)) {
lcCount++;
}
ucParts.add(uc);
}
if (!useful || lcCount == 0 || lcCount == 2) {
continue;
}
String uppercase = Strings.join(ucParts, " ");
if (term.equals(uppercase)) {
continue;
}
long thisCount = getOccurrenceCount(reader, searcher, term);
long thisUpperCount = getOccurrenceCount(reader, searcher, uppercase);
if (count % 10_000 == 0) {
System.err.println(count + " @ " + term);
}
if (thisCount > LIMIT || thisUpperCount > LIMIT) {
if (thisUpperCount > thisCount) {
if (isRelevant(lt, term)) {
float factor = (float) thisUpperCount / thisCount;
System.out.printf("%.2f " + thisUpperCount + " " + uppercase + " " + thisCount + " " + term + "\n", factor);
}
}
}
}
}
use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.
the class BlogChecker method check.
private void check(File dir, Language lang) throws IOException {
JLanguageTool lt = new JLanguageTool(lang);
lt.disableRule("WHITESPACE_RULE");
lt.disableRule("UNPAIRED_BRACKETS");
File[] files = dir.listFiles();
for (File file : files) {
System.out.println("\n=== " + file.getName() + " ================================");
String content = cleanup(FileUtils.readFileToString(file, "utf-8"));
CommandLineTools.checkText(content, lt);
}
}
Aggregations