Search in sources :

Example 6 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class JLanguageToolTest method testOverlapFilter.

@Test
public void testOverlapFilter() throws IOException {
    Category category = new Category(new CategoryId("TEST_ID"), "test category");
    List<PatternToken> elements1 = Arrays.asList(new PatternToken("one", true, false, false));
    PatternRule rule1 = new PatternRule("id1", new English(), elements1, "desc1", "msg1", "shortMsg1");
    rule1.setSubId("1");
    rule1.setCategory(category);
    List<PatternToken> elements2 = Arrays.asList(new PatternToken("one", true, false, false), new PatternToken("two", true, false, false));
    PatternRule rule2 = new PatternRule("id1", new English(), elements2, "desc2", "msg2", "shortMsg2");
    rule2.setSubId("2");
    rule2.setCategory(category);
    JLanguageTool tool = new JLanguageTool(new English());
    tool.addRule(rule1);
    tool.addRule(rule2);
    List<RuleMatch> ruleMatches1 = tool.check("And one two three.");
    assertEquals("one overlapping rule must be filtered out", 1, ruleMatches1.size());
    assertEquals("msg1", ruleMatches1.get(0).getMessage());
    String sentence = "And one two three.";
    AnalyzedSentence analyzedSentence = tool.getAnalyzedSentence(sentence);
    List<Rule> bothRules = new ArrayList<>(Arrays.asList(rule1, rule2));
    List<RuleMatch> ruleMatches2 = tool.checkAnalyzedSentence(ParagraphHandling.NORMAL, bothRules, analyzedSentence);
    assertEquals("one overlapping rule must be filtered out", 1, ruleMatches2.size());
    assertEquals("msg1", ruleMatches2.get(0).getMessage());
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) ArrayList(java.util.ArrayList) AmericanEnglish(org.languagetool.language.AmericanEnglish) English(org.languagetool.language.English) BritishEnglish(org.languagetool.language.BritishEnglish) PatternToken(org.languagetool.rules.patterns.PatternToken) SpellingCheckRule(org.languagetool.rules.spelling.SpellingCheckRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Test(org.junit.Test)

Example 7 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class LanguageToolFilterTest method testFilter.

public void testFilter() throws Exception {
    String input = "How to?";
    Tokenizer stream = new AnyCharTokenizer();
    stream.setReader(new StringReader(input));
    LanguageToolFilter filter = new LanguageToolFilter(stream, new JLanguageTool(new English()), false);
    //displayTokensWithFullDetails(filter);
    String start = "_POS_SENT_START";
    assertTokenStreamContents(filter, new String[] { start, "How", "_LEMMA_how", "_POS_WRB", "to", "_LEMMA_to", "_POS_TO", "_LEMMA_to", "_POS_IN", "?", "_POS_SENT_END" }, new int[] { 0, 0, 0, 0, 4, 4, 4, 4, 4, 6, 6 }, new int[] { 0, 3, 3, 3, 6, 6, 6, 6, 6, 7, 7 }, new String[] { "pos", "word", "pos", "pos", "word", "pos", "pos", "pos", "pos", "word", "pos" }, new int[] { 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, 7);
}
Also used : English(org.languagetool.language.English) JLanguageTool(org.languagetool.JLanguageTool) StringReader(java.io.StringReader) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Example 8 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class PatternRuleQueryBuilderTest method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    language = new English();
    directory = new RAMDirectory();
    /*File indexPath = new File("/tmp/lucene");
    if (indexPath.exists()) {
      FileUtils.deleteDirectory(indexPath);
    }
    directory = FSDirectory.open(indexPath);*/
    Analyzer analyzer = Indexer.getAnalyzer(language);
    IndexWriterConfig config = Indexer.getIndexWriterConfig(analyzer);
    try (IndexWriter writer = new IndexWriter(directory, config)) {
        addDocument(writer, "How do you thin about this wonderful idea?");
        addDocument(writer, "The are several grammar checkers for English, E.G. LanguageTool 123.");
    }
    reader = DirectoryReader.open(directory);
    searcher = newSearcher(reader);
}
Also used : English(org.languagetool.language.English) IndexWriter(org.apache.lucene.index.IndexWriter) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 9 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class WikipediaSentenceSourceTest method testWikipediaSource.

@Test
public void testWikipediaSource() throws XMLStreamException, IOException {
    InputStream stream = WikipediaSentenceSourceTest.class.getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia-en.xml");
    WikipediaSentenceSource source = new WikipediaSentenceSource(stream, new English());
    assertTrue(source.hasNext());
    assertThat(source.next().getText(), is("This is the first document."));
    assertThat(source.next().getText(), is("It has three sentences."));
    assertThat(source.next().getText(), is("Here's the last sentence."));
    assertThat(source.next().getText(), is("This is the second document."));
    assertThat(source.next().getText(), is("It has two sentences."));
    assertFalse(source.hasNext());
}
Also used : English(org.languagetool.language.English) InputStream(java.io.InputStream) Test(org.junit.Test)

Example 10 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method testAllRules.

@Ignore("ignored as long as it doesn't work 100%")
public void testAllRules() throws Exception {
    long startTime = System.currentTimeMillis();
    // comment in to test with external index:
    //directory = new SimpleFSDirectory(new File("/media/external-disk/corpus/languagetool/fast-rule-evaluation-de/"));
    //errorSearcher = new Searcher(directory);
    // TODO: make this work for all languages
    Language language = new English();
    //Language language = new French();
    //Language language = new Spanish();
    //Language language = new Polish();
    //Language language = new German();
    JLanguageTool lt = new JLanguageTool(language);
    System.out.println("Creating index for " + language + "...");
    int ruleCount = createIndex(lt);
    System.out.println("Index created with " + ruleCount + " rules");
    int ruleCounter = 0;
    int ruleProblems = 0;
    int exceptionCount = 0;
    List<Rule> rules = lt.getAllActiveRules();
    for (Rule rule : rules) {
        if (rule instanceof PatternRule && !rule.isDefaultOff()) {
            PatternRule patternRule = (PatternRule) rule;
            try {
                ruleCounter++;
                SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(patternRule, language);
                List<MatchingSentence> matchingSentences = searcherResult.getMatchingSentences();
                boolean foundExpectedMatch = false;
                for (MatchingSentence matchingSentence : matchingSentences) {
                    List<RuleMatch> ruleMatches = matchingSentence.getRuleMatches();
                    List<String> ruleMatchIds = getRuleMatchIds(ruleMatches);
                    if (ruleMatchIds.contains(patternRule.getFullId())) {
                        // TODO: there can be more than one expected match, can't it?
                        foundExpectedMatch = true;
                        break;
                    }
                }
                if (!foundExpectedMatch) {
                    System.out.println("Error: No match found for " + patternRule);
                    System.out.println("Query      : " + searcherResult.getRelaxedQuery().toString(FIELD_NAME_LOWERCASE));
                    System.out.println("Default field: " + FIELD_NAME_LOWERCASE);
                    System.out.println("Lucene Hits: " + searcherResult.getLuceneMatchCount());
                    System.out.println("Matches    : " + matchingSentences);
                    System.out.println("Examples   : " + rule.getIncorrectExamples());
                    System.out.println();
                    ruleProblems++;
                } else {
                //long time = System.currentTimeMillis() - startTime;
                //System.out.println("Tested " + matchingSentences.size() + " sentences in " + time + "ms for rule " + patternRule);
                }
            } catch (UnsupportedPatternRuleException e) {
                System.out.println("UnsupportedPatternRuleException searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
                ruleProblems++;
            } catch (Exception e) {
                System.out.println("Exception searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
                e.printStackTrace(System.out);
                exceptionCount++;
            }
        }
    }
    System.out.println(language + ": problems: " + ruleProblems + ", total rules: " + ruleCounter);
    System.out.println(language + ": exceptions: " + exceptionCount + " (including timeouts)");
    System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms");
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) JLanguageTool(org.languagetool.JLanguageTool) IOException(java.io.IOException) English(org.languagetool.language.English) RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule) Ignore(org.junit.Ignore)

Aggregations

English (org.languagetool.language.English)35 Test (org.junit.Test)19 JLanguageTool (org.languagetool.JLanguageTool)14 PatternRule (org.languagetool.rules.patterns.PatternRule)8 Rule (org.languagetool.rules.Rule)7 RuleMatch (org.languagetool.rules.RuleMatch)7 PatternToken (org.languagetool.rules.patterns.PatternToken)7 AmericanEnglish (org.languagetool.language.AmericanEnglish)6 BritishEnglish (org.languagetool.language.BritishEnglish)5 Before (org.junit.Before)4 Language (org.languagetool.Language)4 InputStream (java.io.InputStream)3 AnalyzedSentence (org.languagetool.AnalyzedSentence)3 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 RAMDirectory (org.apache.lucene.store.RAMDirectory)2 Ignore (org.junit.Ignore)2