Search in sources :

Example 1 with MultiThreadedJLanguageTool

use of org.languagetool.MultiThreadedJLanguageTool in project languagetool by languagetool-org.

the class PerformanceTest2 method run.

private void run(String languageCode, File textFile) throws IOException {
    String text = StringTools.readStream(new FileInputStream(textFile), "utf-8");
    System.out.println("Text length: " + text.length());
    Random rnd = new Random(42);
    Language language = Languages.getLanguageForShortCode(languageCode);
    long totalTime = 0;
    for (int i = 0; i < RUNS; i++) {
        int beginIndex = rnd.nextInt(text.length());
        int endIndex = Math.min(beginIndex + MAX_TEXT_LENGTH, text.length() - 1);
        String subText = text.substring(beginIndex, endIndex);
        long startTime = System.currentTimeMillis();
        MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(language);
        List<RuleMatch> matches = langTool.check(subText);
        long runTime = System.currentTimeMillis() - startTime;
        langTool.shutdown();
        if (i >= SKIP) {
            totalTime += runTime;
            System.out.println("Time: " + runTime + "ms (" + matches.size() + " matches)");
        } else {
            System.out.println("Time: " + runTime + "ms (" + matches.size() + " matches) - skipped because of warm-up");
        }
    }
    System.out.println("Avg. Time: " + (float) totalTime / RUNS);
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Random(java.util.Random) Language(org.languagetool.Language) MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool) FileInputStream(java.io.FileInputStream)

Example 2 with MultiThreadedJLanguageTool

use of org.languagetool.MultiThreadedJLanguageTool in project languagetool by languagetool-org.

the class StartupTimePerformanceTest method run.

private void run(Language language) throws IOException {
    long totalTime = 0;
    for (int i = 0; i < RUNS; i++) {
        long startTime = System.currentTimeMillis();
        MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(language);
        List<RuleMatch> matches = langTool.check("");
        if (matches.size() > 0) {
            throw new RuntimeException("Got matches on empty input for " + language + ": " + matches);
        }
        long runTime = System.currentTimeMillis() - startTime;
        langTool.shutdown();
        if (i >= SKIP) {
            totalTime += runTime;
        }
    //System.out.println(runTime + "ms");
    }
    System.out.println(language.getShortCodeWithCountryAndVariant() + ": avg. Time: " + (float) totalTime / RUNS + "ms");
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool)

Example 3 with MultiThreadedJLanguageTool

use of org.languagetool.MultiThreadedJLanguageTool in project languagetool by languagetool-org.

the class PatternRuleTest method runTestForLanguage.

public void runTestForLanguage(Language lang) throws IOException {
    validatePatternFile(lang);
    System.out.print("Running pattern rule tests for " + lang.getName() + "... ");
    MultiThreadedJLanguageTool languageTool = new MultiThreadedJLanguageTool(lang);
    if (CHECK_WITH_SENTENCE_SPLITTING) {
        disableSpellingRules(languageTool);
    }
    MultiThreadedJLanguageTool allRulesLanguageTool = new MultiThreadedJLanguageTool(lang);
    validateRuleIds(lang, allRulesLanguageTool);
    List<AbstractPatternRule> rules = getAllPatternRules(lang, languageTool);
    for (AbstractPatternRule rule : rules) {
        // Test the rule pattern.
        /* check for useless 'marker' elements commented out - too slow to always run:
      PatternRuleXmlCreator creator = new PatternRuleXmlCreator();
      String xml = creator.toXML(rule.getPatternRuleId(), lang);
      if (PATTERN_MARKER_START.matcher(xml).matches() && PATTERN_MARKER_END.matcher(xml).matches()) {
        System.err.println("WARNING " + lang + ": useless <marker>: " + rule.getFullId());
      }*/
        // too aggressive for now:
        //PatternTestTools.failIfWhitespaceInToken(rule.getPatternTokens(), rule, lang);
        PatternTestTools.warnIfRegexpSyntaxNotKosher(rule.getPatternTokens(), rule.getId(), rule.getSubId(), lang);
        // Test the rule antipatterns.
        List<DisambiguationPatternRule> antiPatterns = rule.getAntiPatterns();
        for (DisambiguationPatternRule antiPattern : antiPatterns) {
            PatternTestTools.warnIfRegexpSyntaxNotKosher(antiPattern.getPatternTokens(), antiPattern.getId(), antiPattern.getSubId(), lang);
        }
        if (rule.getCorrectExamples().size() == 0) {
            boolean correctionExists = false;
            for (IncorrectExample incorrectExample : rule.getIncorrectExamples()) {
                if (incorrectExample.getCorrections().size() > 0) {
                    correctionExists = true;
                    break;
                }
            }
            if (!correctionExists) {
                fail("Rule " + rule.getFullId() + " in language " + lang + " needs at least one <example> with a 'correction' attribute" + " or one <example> of type='correct'.");
            }
        }
    }
    testGrammarRulesFromXML(rules, languageTool, allRulesLanguageTool, lang);
    System.out.println(rules.size() + " rules tested.");
    allRulesLanguageTool.shutdown();
    languageTool.shutdown();
}
Also used : MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) IncorrectExample(org.languagetool.rules.IncorrectExample)

Example 4 with MultiThreadedJLanguageTool

use of org.languagetool.MultiThreadedJLanguageTool in project languagetool by languagetool-org.

the class WikipediaQuickCheck method getLanguageTool.

private MultiThreadedJLanguageTool getLanguageTool(Language lang) throws IOException {
    MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(lang);
    enableWikipediaRules(langTool);
    for (String disabledRuleId : disabledRuleIds) {
        langTool.disableRule(disabledRuleId);
    }
    if (ngramDir != null) {
        langTool.activateLanguageModelRules(ngramDir);
    }
    disableSpellingRules(langTool);
    return langTool;
}
Also used : MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool)

Example 5 with MultiThreadedJLanguageTool

use of org.languagetool.MultiThreadedJLanguageTool in project languagetool by languagetool-org.

the class WikipediaQuickCheck method checkWikipediaMarkup.

MarkupAwareWikipediaResult checkWikipediaMarkup(URL url, MediaWikiContent wikiContent, Language language, ErrorMarker errorMarker) throws IOException {
    SwebleWikipediaTextFilter filter = new SwebleWikipediaTextFilter();
    PlainTextMapping mapping = filter.filter(wikiContent.getContent());
    MultiThreadedJLanguageTool langTool = getLanguageTool(language);
    List<AppliedRuleMatch> appliedMatches = new ArrayList<>();
    List<RuleMatch> matches;
    try {
        matches = langTool.check(mapping.getPlainText());
    } finally {
        langTool.shutdown();
    }
    int internalErrors = 0;
    for (RuleMatch match : matches) {
        SuggestionReplacer replacer = errorMarker != null ? new SuggestionReplacer(mapping, wikiContent.getContent(), errorMarker) : new SuggestionReplacer(mapping, wikiContent.getContent());
        try {
            List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match);
            appliedMatches.add(new AppliedRuleMatch(match, ruleMatchApplications));
        } catch (Exception e) {
            System.err.println("Failed to apply suggestion for rule match '" + match + "' for URL " + url + ": " + e);
            internalErrors++;
        }
    }
    return new MarkupAwareWikipediaResult(wikiContent, appliedMatches, internalErrors);
}
Also used : ArrayList(java.util.ArrayList) MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException) RuleMatch(org.languagetool.rules.RuleMatch)

Aggregations

MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)9 IOException (java.io.IOException)4 RuleMatch (org.languagetool.rules.RuleMatch)4 Language (org.languagetool.Language)3 ArrayList (java.util.ArrayList)2 SAXException (org.xml.sax.SAXException)2 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 SocketTimeoutException (java.net.SocketTimeoutException)1 Random (java.util.Random)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 IncorrectExample (org.languagetool.rules.IncorrectExample)1 BitextRule (org.languagetool.rules.bitext.BitextRule)1 DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)1 StringTools.readerToString (org.languagetool.tools.StringTools.readerToString)1