use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.
the class Main method runOnFileLineByLine.
private void runOnFileLineByLine(String filename, String encoding) throws IOException {
System.err.println("Warning: running in line by line mode. Cross-paragraph checks will not work.\n");
if (options.isVerbose()) {
lt.setOutput(System.err);
}
if (!options.isXmlFormat() && !options.isApplySuggestions()) {
if (isStdIn(filename)) {
System.err.println("Working on STDIN...");
} else {
System.err.println("Working on " + filename + "...");
}
}
if (profileRules && isStdIn(filename)) {
throw new IllegalArgumentException("Profiling mode cannot be used with input from STDIN");
}
int runCount = 1;
List<Rule> rules = lt.getAllActiveRules();
if (profileRules) {
System.out.printf("Testing %d rules\n", rules.size());
System.out.println("Rule ID\tTime\tSentences\tMatches\tSentences per sec.");
runCount = rules.size();
}
int lineOffset = 0;
int tmpLineOffset = 0;
handleLine(ApiPrintMode.START_API, 0, new StringBuilder());
StringBuilder sb = new StringBuilder();
for (int ruleIndex = 0; !rules.isEmpty() && ruleIndex < runCount; ruleIndex++) {
currentRule = rules.get(ruleIndex);
try (InputStreamReader isr = getInputStreamReader(filename, encoding);
BufferedReader br = new BufferedReader(isr)) {
String line;
int lineCount = 0;
while ((line = br.readLine()) != null) {
sb.append(line);
lineCount++;
// to detect language from the first input line
if (lineCount == 1 && options.isAutoDetect()) {
Language language = detectLanguageOfString(line);
if (language == null) {
System.err.println("Could not detect language well enough, using American English");
language = new AmericanEnglish();
}
System.err.println("Language used is: " + language.getName());
language.getSentenceTokenizer().setSingleLineBreaksMarksParagraph(options.isSingleLineBreakMarksParagraph());
changeLanguage(language, options.getMotherTongue(), options.getDisabledRules(), options.getEnabledRules());
}
sb.append('\n');
tmpLineOffset++;
if (isBreakPoint(line)) {
handleLine(ApiPrintMode.CONTINUE_API, lineOffset, sb);
if (profileRules) {
lt.sentenceTokenize(sb.toString()).size();
}
sb = new StringBuilder();
lineOffset = tmpLineOffset;
}
}
} finally {
if (sb.length() > 0) {
if (profileRules) {
lt.sentenceTokenize(sb.toString()).size();
}
}
handleLine(ApiPrintMode.END_API, tmpLineOffset - 1, sb);
}
}
}
use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.
the class Main method runOnFile.
private void runOnFile(String filename, String encoding, boolean xmlFiltering) throws IOException {
if (bitextMode) {
TabBitextReader reader = new TabBitextReader(filename, encoding);
if (options.isApplySuggestions()) {
CommandLineTools.correctBitext(reader, srcLt, lt, bRules);
} else {
CommandLineTools.checkBitext(reader, srcLt, lt, bRules, options.isXmlFormat());
}
} else {
String text = getFilteredText(filename, encoding, xmlFiltering);
if (isStdIn(filename)) {
System.err.println("Working on STDIN...");
} else {
System.err.println("Working on " + filename + "...");
}
if (options.isAutoDetect()) {
Language language = detectLanguageOfString(text);
if (language == null) {
System.err.println("Could not detect language well enough, using American English");
language = new AmericanEnglish();
}
changeLanguage(language, options.getMotherTongue(), options.getDisabledRules(), options.getEnabledRules());
System.err.println("Using " + language.getName() + " for file " + filename);
}
if (options.isApplySuggestions()) {
System.out.print(Tools.correctText(text, lt));
} else if (profileRules) {
CommandLineTools.profileRulesOnText(text, lt);
} else if (!options.isTaggerOnly()) {
CommandLineTools.checkText(text, lt, options.isXmlFormat(), options.isJsonFormat(), 0, options.isListUnknown());
} else {
CommandLineTools.tagText(text, lt);
}
if (options.isListUnknown() && !options.isXmlFormat() && !options.isJsonFormat()) {
System.out.println("Unknown words: " + lt.getUnknownWords());
}
}
}
use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.
the class SpellIgnoreTest method testIgnore.
// code also used in http://wiki.languagetool.org/java-api
@Test
public void testIgnore() throws IOException {
String text = "This is a text with specialword and myotherword";
JLanguageTool lt = new JLanguageTool(new AmericanEnglish());
assertThat(lt.check(text).size(), is(2));
for (Rule rule : lt.getAllActiveRules()) {
if (rule instanceof SpellingCheckRule) {
List<String> wordsToIgnore = Arrays.asList("specialword", "myotherword");
((SpellingCheckRule) rule).addIgnoreTokens(wordsToIgnore);
}
}
assertThat(lt.check(text).size(), is(0));
}
use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.
the class CompoundRuleTest method setUp.
@Before
public void setUp() throws Exception {
lt = new JLanguageTool(new AmericanEnglish());
rule = new CompoundRule(TestTools.getEnglishMessages());
}
use of org.languagetool.language.AmericanEnglish in project languagetool by languagetool-org.
the class SpellingCheckRuleTest method testIgnoreSuggestionsWithMorfologik.
@Test
public void testIgnoreSuggestionsWithMorfologik() throws IOException {
JLanguageTool lt = new JLanguageTool(new AmericanEnglish());
// no error, as this word is in ignore.txt
assertThat(lt.check("This is anArtificialTestWordForLanguageTool.").size(), is(0));
List<RuleMatch> matches2 = lt.check("This is a real typoh.");
assertThat(matches2.size(), is(1));
assertThat(matches2.get(0).getRule().getId(), is("MORFOLOGIK_RULE_EN_US"));
// note the typo
List<RuleMatch> matches3 = lt.check("This is anotherArtificialTestWordForLanguageTol.");
assertThat(matches3.size(), is(1));
assertThat(matches3.get(0).getSuggestedReplacements().toString(), is("[anotherArtificialTestWordForLanguageTool]"));
}
Aggregations