Search in sources :

Example 61 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class SentenceChecker method run.

private void run(Language language, File file) throws IOException {
    JLanguageTool lt = new JLanguageTool(language);
    try (Scanner scanner = new Scanner(file)) {
        int count = 0;
        long startTime = System.currentTimeMillis();
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            lt.check(line);
            if (++count % BATCH_SIZE == 0) {
                long time = System.currentTimeMillis() - startTime;
                System.out.println(count + ". " + time + "ms per " + BATCH_SIZE + " sentences");
                startTime = System.currentTimeMillis();
            }
        }
    }
}
Also used : Scanner(java.util.Scanner) JLanguageTool(org.languagetool.JLanguageTool)

Example 62 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class SimpleRuleCounter method run.

private void run(List<Language> languages) {
    List<Language> sortedLanguages = new ArrayList<>(languages);
    sortedLanguages.sort((l1, l2) -> l1.getName().compareTo(l2.getName()));
    for (Language language : sortedLanguages) {
        if (language.isVariant()) {
            continue;
        }
        JLanguageTool lt = new JLanguageTool(language);
        List<Rule> allRules = lt.getAllActiveRules();
        countForLanguage(allRules, language);
    }
}
Also used : Language(org.languagetool.Language) JLanguageTool(org.languagetool.JLanguageTool) ArrayList(java.util.ArrayList) Rule(org.languagetool.rules.Rule) PatternRule(org.languagetool.rules.patterns.PatternRule)

Example 63 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class UselessExampleFinder method run.

private void run(Language lang) throws IOException {
    File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
    if (!basePath.exists()) {
        throw new RuntimeException("basePath does not exist: " + basePath);
    }
    String langCode = lang.getShortCode();
    File xml = new File(basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml");
    List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
    JLanguageTool tool = new JLanguageTool(lang);
    for (Rule rule : tool.getAllActiveRules()) {
        if (!(rule instanceof PatternRule)) {
            continue;
        }
        List<CorrectExample> correctExamples = rule.getCorrectExamples();
        List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
        for (IncorrectExample incorrectExample : incorrectExamples) {
            checkCorrections(rule, correctExamples, incorrectExample, xmlLines);
        }
    }
    System.err.println("Useless examples: " + uselessExampleCount);
    System.err.println("Removed lines: " + removedLinesCount);
    for (String xmlLine : xmlLines) {
        System.out.println(xmlLine);
    }
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) CorrectExample(org.languagetool.rules.CorrectExample) JLanguageTool(org.languagetool.JLanguageTool) FileReader(java.io.FileReader) Rule(org.languagetool.rules.Rule) PatternRule(org.languagetool.rules.patterns.PatternRule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) IncorrectExample(org.languagetool.rules.IncorrectExample) File(java.io.File)

Example 64 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class GermanUppercasePhraseFinder method main.

public static void main(String[] args) throws IOException {
    if (args.length != 1) {
        System.out.println("Usage: " + GermanUppercasePhraseFinder.class.getSimpleName() + " <ngramIndexDir>");
        System.exit(1);
    }
    JLanguageTool lt = new JLanguageTool(Languages.getLanguageForShortCode("de"));
    FSDirectory fsDir = FSDirectory.open(new File(args[0]).toPath());
    IndexReader reader = DirectoryReader.open(fsDir);
    IndexSearcher searcher = new IndexSearcher(reader);
    Fields fields = MultiFields.getFields(reader);
    Terms terms = fields.terms("ngram");
    TermsEnum termsEnum = terms.iterator();
    int count = 0;
    BytesRef next;
    while ((next = termsEnum.next()) != null) {
        String term = next.utf8ToString();
        count++;
        //term = "persischer Golf";  // for testing
        String[] parts = term.split(" ");
        boolean useful = true;
        int lcCount = 0;
        List<String> ucParts = new ArrayList<>();
        for (String part : parts) {
            if (part.length() < MIN_TERM_LEN) {
                useful = false;
                break;
            }
            String uc = StringTools.uppercaseFirstChar(part);
            if (!part.equals(uc)) {
                lcCount++;
            }
            ucParts.add(uc);
        }
        if (!useful || lcCount == 0 || lcCount == 2) {
            continue;
        }
        String uppercase = Strings.join(ucParts, " ");
        if (term.equals(uppercase)) {
            continue;
        }
        long thisCount = getOccurrenceCount(reader, searcher, term);
        long thisUpperCount = getOccurrenceCount(reader, searcher, uppercase);
        if (count % 10_000 == 0) {
            System.err.println(count + " @ " + term);
        }
        if (thisCount > LIMIT || thisUpperCount > LIMIT) {
            if (thisUpperCount > thisCount) {
                if (isRelevant(lt, term)) {
                    float factor = (float) thisUpperCount / thisCount;
                    System.out.printf("%.2f " + thisUpperCount + " " + uppercase + " " + thisCount + " " + term + "\n", factor);
                }
            }
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) JLanguageTool(org.languagetool.JLanguageTool) ArrayList(java.util.ArrayList) FSDirectory(org.apache.lucene.store.FSDirectory) File(java.io.File) BytesRef(org.apache.lucene.util.BytesRef)

Example 65 with JLanguageTool

use of org.languagetool.JLanguageTool in project languagetool by languagetool-org.

the class BlogChecker method check.

private void check(File dir, Language lang) throws IOException {
    JLanguageTool lt = new JLanguageTool(lang);
    lt.disableRule("WHITESPACE_RULE");
    lt.disableRule("UNPAIRED_BRACKETS");
    File[] files = dir.listFiles();
    for (File file : files) {
        System.out.println("\n=== " + file.getName() + " ================================");
        String content = cleanup(FileUtils.readFileToString(file, "utf-8"));
        CommandLineTools.checkText(content, lt);
    }
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) File(java.io.File)

Aggregations

JLanguageTool (org.languagetool.JLanguageTool)184 Test (org.junit.Test)109 RuleMatch (org.languagetool.rules.RuleMatch)57 Before (org.junit.Before)38 German (org.languagetool.language.German)16 Rule (org.languagetool.rules.Rule)16 Catalan (org.languagetool.language.Catalan)14 Ukrainian (org.languagetool.language.Ukrainian)14 English (org.languagetool.language.English)13 Polish (org.languagetool.language.Polish)12 Language (org.languagetool.Language)10 GermanyGerman (org.languagetool.language.GermanyGerman)9 PatternRule (org.languagetool.rules.patterns.PatternRule)9 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 File (java.io.File)7 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)6 Dutch (org.languagetool.language.Dutch)5 French (org.languagetool.language.French)5 ArrayList (java.util.ArrayList)4 FakeLanguage (org.languagetool.FakeLanguage)4