use of org.languagetool.language.English in project languagetool by languagetool-org.
the class WikipediaSentenceSourceTest method testWikipediaSource.
@Test
public void testWikipediaSource() throws XMLStreamException, IOException {
InputStream stream = WikipediaSentenceSourceTest.class.getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia-en.xml");
WikipediaSentenceSource source = new WikipediaSentenceSource(stream, new English());
assertTrue(source.hasNext());
assertThat(source.next().getText(), is("This is the first document."));
assertThat(source.next().getText(), is("It has three sentences."));
assertThat(source.next().getText(), is("Here's the last sentence."));
assertThat(source.next().getText(), is("This is the second document."));
assertThat(source.next().getText(), is("It has two sentences."));
assertFalse(source.hasNext());
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class IndexerSearcherTest method testAllRules.
@Ignore("ignored as long as it doesn't work 100%")
public void testAllRules() throws Exception {
long startTime = System.currentTimeMillis();
// comment in to test with external index:
//directory = new SimpleFSDirectory(new File("/media/external-disk/corpus/languagetool/fast-rule-evaluation-de/"));
//errorSearcher = new Searcher(directory);
// TODO: make this work for all languages
Language language = new English();
//Language language = new French();
//Language language = new Spanish();
//Language language = new Polish();
//Language language = new German();
JLanguageTool lt = new JLanguageTool(language);
System.out.println("Creating index for " + language + "...");
int ruleCount = createIndex(lt);
System.out.println("Index created with " + ruleCount + " rules");
int ruleCounter = 0;
int ruleProblems = 0;
int exceptionCount = 0;
List<Rule> rules = lt.getAllActiveRules();
for (Rule rule : rules) {
if (rule instanceof PatternRule && !rule.isDefaultOff()) {
PatternRule patternRule = (PatternRule) rule;
try {
ruleCounter++;
SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(patternRule, language);
List<MatchingSentence> matchingSentences = searcherResult.getMatchingSentences();
boolean foundExpectedMatch = false;
for (MatchingSentence matchingSentence : matchingSentences) {
List<RuleMatch> ruleMatches = matchingSentence.getRuleMatches();
List<String> ruleMatchIds = getRuleMatchIds(ruleMatches);
if (ruleMatchIds.contains(patternRule.getFullId())) {
// TODO: there can be more than one expected match, can't it?
foundExpectedMatch = true;
break;
}
}
if (!foundExpectedMatch) {
System.out.println("Error: No match found for " + patternRule);
System.out.println("Query : " + searcherResult.getRelaxedQuery().toString(FIELD_NAME_LOWERCASE));
System.out.println("Default field: " + FIELD_NAME_LOWERCASE);
System.out.println("Lucene Hits: " + searcherResult.getLuceneMatchCount());
System.out.println("Matches : " + matchingSentences);
System.out.println("Examples : " + rule.getIncorrectExamples());
System.out.println();
ruleProblems++;
} else {
//long time = System.currentTimeMillis() - startTime;
//System.out.println("Tested " + matchingSentences.size() + " sentences in " + time + "ms for rule " + patternRule);
}
} catch (UnsupportedPatternRuleException e) {
System.out.println("UnsupportedPatternRuleException searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
ruleProblems++;
} catch (Exception e) {
System.out.println("Exception searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
e.printStackTrace(System.out);
exceptionCount++;
}
}
}
System.out.println(language + ": problems: " + ruleProblems + ", total rules: " + ruleCounter);
System.out.println(language + ": exceptions: " + exceptionCount + " (including timeouts)");
System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms");
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class IndexerSearcherTest method createIndex.
private void createIndex(String content) throws IOException {
directory = new RAMDirectory();
//directory = FSDirectory.open(new File("/tmp/lucenetest")); // for debugging
Indexer.run(content, directory, new English());
errorSearcher = new Searcher(directory);
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class IndexerSearcherTest method testIndexerSearcherWithEnglish.
public void testIndexerSearcherWithEnglish() throws Exception {
// Note that the second sentence ends with "lid" instead of "lids" (the inflated one)
createIndex("How to move back and fourth from linux to xmb? Calcium deposits on eye lid.");
English language = new English();
SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(getFirstRule("BACK_AND_FOURTH", language), language);
assertEquals(2, searcherResult.getCheckedSentences());
assertEquals(false, searcherResult.isResultIsTimeLimited());
assertEquals(1, searcherResult.getMatchingSentences().size());
searcherResult = errorSearcher.findRuleMatchesOnIndex(getFirstRule("EYE_BROW", language), language);
assertEquals(2, searcherResult.getCheckedSentences());
assertEquals(false, searcherResult.isResultIsTimeLimited());
assertEquals(1, searcherResult.getMatchingSentences().size());
searcherResult = errorSearcher.findRuleMatchesOnIndex(getFirstRule("ALL_OVER_THE_WORD", language), language);
assertEquals(2, searcherResult.getCheckedSentences());
assertEquals(false, searcherResult.isResultIsTimeLimited());
assertEquals(0, searcherResult.getMatchingSentences().size());
try {
errorSearcher.findRuleMatchesOnIndex(getFirstRule("Invalid Rule Id", language), language);
fail("Exception should be thrown for invalid rule id.");
} catch (PatternRuleNotFoundException ignored) {
}
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class IndexerSearcherTest method testWithNewRule.
public void testWithNewRule() throws Exception {
createIndex("How to move back and fourth from linux to xmb?");
List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), new PatternToken("back", false, false, false));
PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
Searcher errorSearcher = new Searcher(directory);
SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
assertEquals(1, searcherResult.getCheckedSentences());
assertEquals(1, searcherResult.getMatchingSentences().size());
List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
assertEquals(1, ruleMatches.size());
Rule rule = ruleMatches.get(0).getRule();
assertEquals("RULE1", rule.getId());
}
Aggregations