Search in sources :

Example 11 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class WikipediaSentenceSourceTest method testWikipediaSource.

@Test
public void testWikipediaSource() throws XMLStreamException, IOException {
    InputStream stream = WikipediaSentenceSourceTest.class.getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia-en.xml");
    WikipediaSentenceSource source = new WikipediaSentenceSource(stream, new English());
    assertTrue(source.hasNext());
    assertThat(source.next().getText(), is("This is the first document."));
    assertThat(source.next().getText(), is("It has three sentences."));
    assertThat(source.next().getText(), is("Here's the last sentence."));
    assertThat(source.next().getText(), is("This is the second document."));
    assertThat(source.next().getText(), is("It has two sentences."));
    assertFalse(source.hasNext());
}
Also used : English(org.languagetool.language.English) InputStream(java.io.InputStream) Test(org.junit.Test)

Example 12 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method testAllRules.

@Ignore("ignored as long as it doesn't work 100%")
public void testAllRules() throws Exception {
    long startTime = System.currentTimeMillis();
    // comment in to test with external index:
    //directory = new SimpleFSDirectory(new File("/media/external-disk/corpus/languagetool/fast-rule-evaluation-de/"));
    //errorSearcher = new Searcher(directory);
    // TODO: make this work for all languages
    Language language = new English();
    //Language language = new French();
    //Language language = new Spanish();
    //Language language = new Polish();
    //Language language = new German();
    JLanguageTool lt = new JLanguageTool(language);
    System.out.println("Creating index for " + language + "...");
    int ruleCount = createIndex(lt);
    System.out.println("Index created with " + ruleCount + " rules");
    int ruleCounter = 0;
    int ruleProblems = 0;
    int exceptionCount = 0;
    List<Rule> rules = lt.getAllActiveRules();
    for (Rule rule : rules) {
        if (rule instanceof PatternRule && !rule.isDefaultOff()) {
            PatternRule patternRule = (PatternRule) rule;
            try {
                ruleCounter++;
                SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(patternRule, language);
                List<MatchingSentence> matchingSentences = searcherResult.getMatchingSentences();
                boolean foundExpectedMatch = false;
                for (MatchingSentence matchingSentence : matchingSentences) {
                    List<RuleMatch> ruleMatches = matchingSentence.getRuleMatches();
                    List<String> ruleMatchIds = getRuleMatchIds(ruleMatches);
                    if (ruleMatchIds.contains(patternRule.getFullId())) {
                        // TODO: there can be more than one expected match, can't it?
                        foundExpectedMatch = true;
                        break;
                    }
                }
                if (!foundExpectedMatch) {
                    System.out.println("Error: No match found for " + patternRule);
                    System.out.println("Query      : " + searcherResult.getRelaxedQuery().toString(FIELD_NAME_LOWERCASE));
                    System.out.println("Default field: " + FIELD_NAME_LOWERCASE);
                    System.out.println("Lucene Hits: " + searcherResult.getLuceneMatchCount());
                    System.out.println("Matches    : " + matchingSentences);
                    System.out.println("Examples   : " + rule.getIncorrectExamples());
                    System.out.println();
                    ruleProblems++;
                } else {
                //long time = System.currentTimeMillis() - startTime;
                //System.out.println("Tested " + matchingSentences.size() + " sentences in " + time + "ms for rule " + patternRule);
                }
            } catch (UnsupportedPatternRuleException e) {
                System.out.println("UnsupportedPatternRuleException searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
                ruleProblems++;
            } catch (Exception e) {
                System.out.println("Exception searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
                e.printStackTrace(System.out);
                exceptionCount++;
            }
        }
    }
    System.out.println(language + ": problems: " + ruleProblems + ", total rules: " + ruleCounter);
    System.out.println(language + ": exceptions: " + exceptionCount + " (including timeouts)");
    System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms");
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) JLanguageTool(org.languagetool.JLanguageTool) IOException(java.io.IOException) English(org.languagetool.language.English) RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule) Ignore(org.junit.Ignore)

Example 13 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method createIndex.

private void createIndex(String content) throws IOException {
    directory = new RAMDirectory();
    //directory = FSDirectory.open(new File("/tmp/lucenetest"));  // for debugging
    Indexer.run(content, directory, new English());
    errorSearcher = new Searcher(directory);
}
Also used : English(org.languagetool.language.English) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Example 14 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method testIndexerSearcherWithEnglish.

public void testIndexerSearcherWithEnglish() throws Exception {
    // Note that the second sentence ends with "lid" instead of "lids" (the inflated one)
    createIndex("How to move back and fourth from linux to xmb? Calcium deposits on eye lid.");
    English language = new English();
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(getFirstRule("BACK_AND_FOURTH", language), language);
    assertEquals(2, searcherResult.getCheckedSentences());
    assertEquals(false, searcherResult.isResultIsTimeLimited());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    searcherResult = errorSearcher.findRuleMatchesOnIndex(getFirstRule("EYE_BROW", language), language);
    assertEquals(2, searcherResult.getCheckedSentences());
    assertEquals(false, searcherResult.isResultIsTimeLimited());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    searcherResult = errorSearcher.findRuleMatchesOnIndex(getFirstRule("ALL_OVER_THE_WORD", language), language);
    assertEquals(2, searcherResult.getCheckedSentences());
    assertEquals(false, searcherResult.isResultIsTimeLimited());
    assertEquals(0, searcherResult.getMatchingSentences().size());
    try {
        errorSearcher.findRuleMatchesOnIndex(getFirstRule("Invalid Rule Id", language), language);
        fail("Exception should be thrown for invalid rule id.");
    } catch (PatternRuleNotFoundException ignored) {
    }
}
Also used : English(org.languagetool.language.English)

Example 15 with English

use of org.languagetool.language.English in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithNewRule.

public void testWithNewRule() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), new PatternToken("back", false, false, false));
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult.getCheckedSentences());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Aggregations

English (org.languagetool.language.English)35 Test (org.junit.Test)19 JLanguageTool (org.languagetool.JLanguageTool)14 PatternRule (org.languagetool.rules.patterns.PatternRule)8 Rule (org.languagetool.rules.Rule)7 RuleMatch (org.languagetool.rules.RuleMatch)7 PatternToken (org.languagetool.rules.patterns.PatternToken)7 AmericanEnglish (org.languagetool.language.AmericanEnglish)6 BritishEnglish (org.languagetool.language.BritishEnglish)5 Before (org.junit.Before)4 Language (org.languagetool.Language)4 InputStream (java.io.InputStream)3 AnalyzedSentence (org.languagetool.AnalyzedSentence)3 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 RAMDirectory (org.apache.lucene.store.RAMDirectory)2 Ignore (org.junit.Ignore)2