Search in sources :

Example 6 with AmericanEnglish

use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.

the class Main method runOnFileLineByLine.

private void runOnFileLineByLine(String filename, String encoding) throws IOException {
    System.err.println("Warning: running in line by line mode. Cross-paragraph checks will not work.\n");
    if (options.isVerbose()) {
        lt.setOutput(System.err);
    }
    if (!options.isXmlFormat() && !options.isApplySuggestions()) {
        if (isStdIn(filename)) {
            System.err.println("Working on STDIN...");
        } else {
            System.err.println("Working on " + filename + "...");
        }
    }
    if (profileRules && isStdIn(filename)) {
        throw new IllegalArgumentException("Profiling mode cannot be used with input from STDIN");
    }
    int runCount = 1;
    List<Rule> rules = lt.getAllActiveRules();
    if (profileRules) {
        System.out.printf("Testing %d rules\n", rules.size());
        System.out.println("Rule ID\tTime\tSentences\tMatches\tSentences per sec.");
        runCount = rules.size();
    }
    int lineOffset = 0;
    int tmpLineOffset = 0;
    handleLine(ApiPrintMode.START_API, 0, new StringBuilder());
    StringBuilder sb = new StringBuilder();
    for (int ruleIndex = 0; !rules.isEmpty() && ruleIndex < runCount; ruleIndex++) {
        currentRule = rules.get(ruleIndex);
        try (InputStreamReader isr = getInputStreamReader(filename, encoding);
            BufferedReader br = new BufferedReader(isr)) {
            String line;
            int lineCount = 0;
            while ((line = br.readLine()) != null) {
                sb.append(line);
                lineCount++;
                // to detect language from the first input line
                if (lineCount == 1 && options.isAutoDetect()) {
                    Language language = detectLanguageOfString(line);
                    if (language == null) {
                        System.err.println("Could not detect language well enough, using American English");
                        language = new AmericanEnglish();
                    }
                    System.err.println("Language used is: " + language.getName());
                    language.getSentenceTokenizer().setSingleLineBreaksMarksParagraph(options.isSingleLineBreakMarksParagraph());
                    changeLanguage(language, options.getMotherTongue(), options.getDisabledRules(), options.getEnabledRules());
                }
                sb.append('\n');
                tmpLineOffset++;
                if (isBreakPoint(line)) {
                    handleLine(ApiPrintMode.CONTINUE_API, lineOffset, sb);
                    if (profileRules) {
                        lt.sentenceTokenize(sb.toString()).size();
                    }
                    sb = new StringBuilder();
                    lineOffset = tmpLineOffset;
                }
            }
        } finally {
            if (sb.length() > 0) {
                if (profileRules) {
                    lt.sentenceTokenize(sb.toString()).size();
                }
            }
            handleLine(ApiPrintMode.END_API, tmpLineOffset - 1, sb);
        }
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Language(org.languagetool.Language) AmericanEnglish(org.languagetool.language.AmericanEnglish) BufferedReader(java.io.BufferedReader) BitextRule(org.languagetool.rules.bitext.BitextRule) Rule(org.languagetool.rules.Rule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) StringTools.readerToString(org.languagetool.tools.StringTools.readerToString)

Example 7 with AmericanEnglish

use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.

the class Main method runOnFile.

private void runOnFile(String filename, String encoding, boolean xmlFiltering) throws IOException {
    if (bitextMode) {
        TabBitextReader reader = new TabBitextReader(filename, encoding);
        if (options.isApplySuggestions()) {
            CommandLineTools.correctBitext(reader, srcLt, lt, bRules);
        } else {
            CommandLineTools.checkBitext(reader, srcLt, lt, bRules, options.isXmlFormat());
        }
    } else {
        String text = getFilteredText(filename, encoding, xmlFiltering);
        if (isStdIn(filename)) {
            System.err.println("Working on STDIN...");
        } else {
            System.err.println("Working on " + filename + "...");
        }
        if (options.isAutoDetect()) {
            Language language = detectLanguageOfString(text);
            if (language == null) {
                System.err.println("Could not detect language well enough, using American English");
                language = new AmericanEnglish();
            }
            changeLanguage(language, options.getMotherTongue(), options.getDisabledRules(), options.getEnabledRules());
            System.err.println("Using " + language.getName() + " for file " + filename);
        }
        if (options.isApplySuggestions()) {
            System.out.print(Tools.correctText(text, lt));
        } else if (profileRules) {
            CommandLineTools.profileRulesOnText(text, lt);
        } else if (!options.isTaggerOnly()) {
            CommandLineTools.checkText(text, lt, options.isXmlFormat(), options.isJsonFormat(), 0, options.isListUnknown());
        } else {
            CommandLineTools.tagText(text, lt);
        }
        if (options.isListUnknown() && !options.isXmlFormat() && !options.isJsonFormat()) {
            System.out.println("Unknown words: " + lt.getUnknownWords());
        }
    }
}
Also used : Language(org.languagetool.Language) AmericanEnglish(org.languagetool.language.AmericanEnglish) TabBitextReader(org.languagetool.bitext.TabBitextReader) StringTools.readerToString(org.languagetool.tools.StringTools.readerToString)

Example 8 with AmericanEnglish

use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.

the class SpellIgnoreTest method testIgnore.

// code also used in http://wiki.languagetool.org/java-api
@Test
public void testIgnore() throws IOException {
    String text = "This is a text with specialword and myotherword";
    JLanguageTool lt = new JLanguageTool(new AmericanEnglish());
    assertThat(lt.check(text).size(), is(2));
    for (Rule rule : lt.getAllActiveRules()) {
        if (rule instanceof SpellingCheckRule) {
            List<String> wordsToIgnore = Arrays.asList("specialword", "myotherword");
            ((SpellingCheckRule) rule).addIgnoreTokens(wordsToIgnore);
        }
    }
    assertThat(lt.check(text).size(), is(0));
}
Also used : SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) AmericanEnglish(org.languagetool.language.AmericanEnglish) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) Rule(org.languagetool.rules.Rule) Test(org.junit.Test)

Example 9 with AmericanEnglish

use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.

the class CompoundRuleTest method setUp.

@Before
public void setUp() throws Exception {
    lt = new JLanguageTool(new AmericanEnglish());
    rule = new CompoundRule(TestTools.getEnglishMessages());
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) AmericanEnglish(org.languagetool.language.AmericanEnglish) Before(org.junit.Before)

Example 10 with AmericanEnglish

use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.

the class SpellingCheckRuleTest method testIgnoreSuggestionsWithMorfologik.

@Test
public void testIgnoreSuggestionsWithMorfologik() throws IOException {
    JLanguageTool lt = new JLanguageTool(new AmericanEnglish());
    // no error, as this word is in ignore.txt
    assertThat(lt.check("This is anArtificialTestWordForLanguageTool.").size(), is(0));
    List<RuleMatch> matches2 = lt.check("This is a real typoh.");
    assertThat(matches2.size(), is(1));
    assertThat(matches2.get(0).getRule().getId(), is("MORFOLOGIK_RULE_EN_US"));
    // note the typo
    List<RuleMatch> matches3 = lt.check("This is anotherArtificialTestWordForLanguageTol.");
    assertThat(matches3.size(), is(1));
    assertThat(matches3.get(0).getSuggestedReplacements().toString(), is("[anotherArtificialTestWordForLanguageTool]"));
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) AmericanEnglish(org.languagetool.language.AmericanEnglish) Test(org.junit.Test)

Aggregations

AmericanEnglish (org.languagetool.language.AmericanEnglish)10 Test (org.junit.Test)7 Rule (org.languagetool.rules.Rule)4 JLanguageTool (org.languagetool.JLanguageTool)3 Language (org.languagetool.Language)3 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)2 StringTools.readerToString (org.languagetool.tools.StringTools.readerToString)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 InputStreamReader (java.io.InputStreamReader)1 Before (org.junit.Before)1 TabBitextReader (org.languagetool.bitext.TabBitextReader)1 Belarusian (org.languagetool.language.Belarusian)1 RuleMatch (org.languagetool.rules.RuleMatch)1 BitextRule (org.languagetool.rules.bitext.BitextRule)1 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)1