Search in sources :

Example 16 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithOneElementWithException.

public void testWithOneElementWithException() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    PatternToken exceptionElem = new PatternToken("", false, true, false);
    exceptionElem.setStringPosException("exception", false, false, false, false, false, "POS", false, false, null);
    List<PatternToken> patternTokens = Arrays.asList(exceptionElem);
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    try {
        errorSearcher.findRuleMatchesOnIndex(rule1, new English());
        fail();
    } catch (UnsupportedPatternRuleException ignored) {
    }
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) PatternRule(org.languagetool.rules.patterns.PatternRule)

Example 17 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithException.

public void testWithException() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    PatternToken exceptionElem = new PatternToken("forth|back", false, true, false);
    exceptionElem.setStringPosException("exception", false, false, false, false, false, "POS", false, false, null);
    List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), exceptionElem);
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult.getCheckedSentences());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Example 18 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class Main method main.

/**
   * Command line tool to check plain text files.
   */
public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException {
    JnaTools.setBugWorkaroundProperty();
    CommandLineParser commandLineParser = new CommandLineParser();
    CommandLineOptions options = null;
    try {
        options = commandLineParser.parseOptions(args);
    } catch (WrongParameterNumberException e) {
        commandLineParser.printUsage();
        System.exit(1);
    } catch (IllegalArgumentException e) {
        System.err.println(e.toString());
        System.exit(1);
    } catch (UnknownParameterException e) {
        if (e.getMessage() != null) {
            System.err.println(e.getMessage());
        } else {
            System.err.println(e.toString());
        }
        commandLineParser.printUsage(System.err);
        System.exit(1);
    }
    if (options.isPrintUsage()) {
        commandLineParser.printUsage();
        System.exit(1);
    }
    if (options.isPrintVersion()) {
        System.out.println("LanguageTool version " + JLanguageTool.VERSION + " (" + JLanguageTool.BUILD_DATE + ")");
        System.exit(0);
    }
    if (options.isPrintLanguages()) {
        printLanguages();
        System.exit(0);
    }
    if (options.getFilename() == null) {
        options.setFilename("-");
    }
    String languageHint = null;
    if (options.getLanguage() == null) {
        if (!options.isXmlFormat() && !options.isAutoDetect()) {
            System.err.println("No language specified, using English (no spell checking active, " + "specify a language variant like 'en-GB' if available)");
        }
        options.setLanguage(new English());
    } else if (!options.isXmlFormat() && !options.isApplySuggestions()) {
        languageHint = "Expected text language: " + options.getLanguage().getName();
    }
    options.getLanguage().getSentenceTokenizer().setSingleLineBreaksMarksParagraph(options.isSingleLineBreakMarksParagraph());
    Main prg = new Main(options);
    if (options.getFalseFriendFile() != null) {
        List<AbstractPatternRule> ffRules = prg.lt.loadFalseFriendRules(options.getFalseFriendFile());
        for (AbstractPatternRule ffRule : ffRules) {
            prg.lt.addRule(ffRule);
        }
    }
    if (prg.lt.getAllActiveRules().size() == 0) {
        List<String> catIds = options.getEnabledCategories().stream().map(i -> i.toString()).collect(Collectors.toList());
        throw new RuntimeException("No rules are active. Please make sure your rule ids " + "(" + options.getEnabledRules() + ") and " + "category ids (" + catIds + ") are correct");
    }
    if (languageHint != null) {
        String spellHint = prg.isSpellCheckingActive() ? "" : " (no spell checking active, specify a language variant like 'en-GB' if available)";
        System.err.println(languageHint + spellHint);
    }
    prg.setListUnknownWords(options.isListUnknown());
    if (options.isProfile()) {
        prg.setProfilingMode();
    }
    if (options.isBitext()) {
        if (options.getMotherTongue() == null) {
            throw new IllegalArgumentException("You have to set the source language (as mother tongue) in bitext mode");
        }
        File bitextRuleFile = options.getBitextRuleFile() != null ? new File(options.getBitextRuleFile()) : null;
        prg.setBitextMode(options.getMotherTongue(), options.getDisabledRules(), options.getEnabledRules(), bitextRuleFile);
    }
    if (options.isRecursive()) {
        prg.runRecursive(options.getFilename(), options.getEncoding(), options.isXmlFiltering());
    } else {
        if (options.isLineByLine()) {
            prg.runOnFileLineByLine(options.getFilename(), options.getEncoding());
        } else {
            prg.runOnFile(options.getFilename(), options.getEncoding(), options.isXmlFiltering());
        }
    }
    prg.cleanUp();
}
Also used : BufferedInputStream(java.io.BufferedInputStream) JLanguageTool(org.languagetool.JLanguageTool) StringTools.readerToString(org.languagetool.tools.StringTools.readerToString) ArrayList(java.util.ArrayList) MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool) HashSet(java.util.HashSet) Charset(java.nio.charset.Charset) BOMInputStream(org.apache.commons.io.input.BOMInputStream) LanguageIdentifier(org.languagetool.language.LanguageIdentifier) TabBitextReader(org.languagetool.bitext.TabBitextReader) ApiPrintMode(org.languagetool.tools.StringTools.ApiPrintMode) AmericanEnglish(org.languagetool.language.AmericanEnglish) BitextRule(org.languagetool.rules.bitext.BitextRule) IOException(java.io.IOException) ByteOrderMark(org.apache.commons.io.ByteOrderMark) FileInputStream(java.io.FileInputStream) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) Rule(org.languagetool.rules.Rule) English(org.languagetool.language.English) List(java.util.List) Tools(org.languagetool.tools.Tools) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) StringTools.filterXML(org.languagetool.tools.StringTools.filterXML) SAXException(org.xml.sax.SAXException) Language(org.languagetool.Language) PatternRuleLoader(org.languagetool.rules.patterns.PatternRuleLoader) Languages(org.languagetool.Languages) JnaTools(org.languagetool.tools.JnaTools) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) InputStream(java.io.InputStream) StringTools.readerToString(org.languagetool.tools.StringTools.readerToString) AmericanEnglish(org.languagetool.language.AmericanEnglish) English(org.languagetool.language.English) File(java.io.File) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Example 19 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class TatoebaSentenceSourceTest method testTatoebaSourceInvalidInput.

@Test(expected = RuntimeException.class)
public void testTatoebaSourceInvalidInput() throws UnsupportedEncodingException {
    ByteArrayInputStream stream = new ByteArrayInputStream("just a text".getBytes("utf-8"));
    TatoebaSentenceSource source = new TatoebaSentenceSource(stream, new English());
    source.hasNext();
}
Also used : English(org.languagetool.language.English) ByteArrayInputStream(java.io.ByteArrayInputStream) Test(org.junit.Test)

Example 20 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class TatoebaSentenceSourceTest method testTatoebaSource.

@Test
public void testTatoebaSource() {
    InputStream stream = WikipediaSentenceSourceTest.class.getResourceAsStream("/org/languagetool/dev/wikipedia/tatoeba-en.txt");
    TatoebaSentenceSource source = new TatoebaSentenceSource(stream, new English());
    assertTrue(source.hasNext());
    assertThat(source.next().getText(), is("\"What is your wish?\" asked the little white rabbit."));
    assertThat(source.next().getText(), is("The mother wakes up her daughter."));
    assertThat(source.next().getText(), is("Ken beat me at chess."));
    assertFalse(source.hasNext());
}
Also used : English(org.languagetool.language.English) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Test(org.junit.Test)

Aggregations

English (org.languagetool.language.English)35 Test (org.junit.Test)19 JLanguageTool (org.languagetool.JLanguageTool)14 PatternRule (org.languagetool.rules.patterns.PatternRule)8 Rule (org.languagetool.rules.Rule)7 RuleMatch (org.languagetool.rules.RuleMatch)7 PatternToken (org.languagetool.rules.patterns.PatternToken)7 AmericanEnglish (org.languagetool.language.AmericanEnglish)6 BritishEnglish (org.languagetool.language.BritishEnglish)5 Before (org.junit.Before)4 Language (org.languagetool.Language)4 InputStream (java.io.InputStream)3 AnalyzedSentence (org.languagetool.AnalyzedSentence)3 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 RAMDirectory (org.apache.lucene.store.RAMDirectory)2 Ignore (org.junit.Ignore)2