use of org.languagetool.language.English in project languagetool by languagetool-org.
the class JLanguageToolTest method testOverlapFilter.
@Test
public void testOverlapFilter() throws IOException {
Category category = new Category(new CategoryId("TEST_ID"), "test category");
List<PatternToken> elements1 = Arrays.asList(new PatternToken("one", true, false, false));
PatternRule rule1 = new PatternRule("id1", new English(), elements1, "desc1", "msg1", "shortMsg1");
rule1.setSubId("1");
rule1.setCategory(category);
List<PatternToken> elements2 = Arrays.asList(new PatternToken("one", true, false, false), new PatternToken("two", true, false, false));
PatternRule rule2 = new PatternRule("id1", new English(), elements2, "desc2", "msg2", "shortMsg2");
rule2.setSubId("2");
rule2.setCategory(category);
JLanguageTool tool = new JLanguageTool(new English());
tool.addRule(rule1);
tool.addRule(rule2);
List<RuleMatch> ruleMatches1 = tool.check("And one two three.");
assertEquals("one overlapping rule must be filtered out", 1, ruleMatches1.size());
assertEquals("msg1", ruleMatches1.get(0).getMessage());
String sentence = "And one two three.";
AnalyzedSentence analyzedSentence = tool.getAnalyzedSentence(sentence);
List<Rule> bothRules = new ArrayList<>(Arrays.asList(rule1, rule2));
List<RuleMatch> ruleMatches2 = tool.checkAnalyzedSentence(ParagraphHandling.NORMAL, bothRules, analyzedSentence);
assertEquals("one overlapping rule must be filtered out", 1, ruleMatches2.size());
assertEquals("msg1", ruleMatches2.get(0).getMessage());
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class LanguageToolFilterTest method testFilter.
public void testFilter() throws Exception {
String input = "How to?";
Tokenizer stream = new AnyCharTokenizer();
stream.setReader(new StringReader(input));
LanguageToolFilter filter = new LanguageToolFilter(stream, new JLanguageTool(new English()), false);
//displayTokensWithFullDetails(filter);
String start = "_POS_SENT_START";
assertTokenStreamContents(filter, new String[] { start, "How", "_LEMMA_how", "_POS_WRB", "to", "_LEMMA_to", "_POS_TO", "_LEMMA_to", "_POS_IN", "?", "_POS_SENT_END" }, new int[] { 0, 0, 0, 0, 4, 4, 4, 4, 4, 6, 6 }, new int[] { 0, 3, 3, 3, 6, 6, 6, 6, 6, 7, 7 }, new String[] { "pos", "word", "pos", "pos", "word", "pos", "pos", "pos", "pos", "word", "pos" }, new int[] { 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, 7);
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class PatternRuleQueryBuilderTest method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
language = new English();
directory = new RAMDirectory();
/*File indexPath = new File("/tmp/lucene");
if (indexPath.exists()) {
FileUtils.deleteDirectory(indexPath);
}
directory = FSDirectory.open(indexPath);*/
Analyzer analyzer = Indexer.getAnalyzer(language);
IndexWriterConfig config = Indexer.getIndexWriterConfig(analyzer);
try (IndexWriter writer = new IndexWriter(directory, config)) {
addDocument(writer, "How do you thin about this wonderful idea?");
addDocument(writer, "The are several grammar checkers for English, E.G. LanguageTool 123.");
}
reader = DirectoryReader.open(directory);
searcher = newSearcher(reader);
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class WikipediaSentenceSourceTest method testWikipediaSource.
@Test
public void testWikipediaSource() throws XMLStreamException, IOException {
InputStream stream = WikipediaSentenceSourceTest.class.getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia-en.xml");
WikipediaSentenceSource source = new WikipediaSentenceSource(stream, new English());
assertTrue(source.hasNext());
assertThat(source.next().getText(), is("This is the first document."));
assertThat(source.next().getText(), is("It has three sentences."));
assertThat(source.next().getText(), is("Here's the last sentence."));
assertThat(source.next().getText(), is("This is the second document."));
assertThat(source.next().getText(), is("It has two sentences."));
assertFalse(source.hasNext());
}
use of org.languagetool.language.English in project languagetool by languagetool-org.
the class IndexerSearcherTest method testAllRules.
@Ignore("ignored as long as it doesn't work 100%")
public void testAllRules() throws Exception {
long startTime = System.currentTimeMillis();
// comment in to test with external index:
//directory = new SimpleFSDirectory(new File("/media/external-disk/corpus/languagetool/fast-rule-evaluation-de/"));
//errorSearcher = new Searcher(directory);
// TODO: make this work for all languages
Language language = new English();
//Language language = new French();
//Language language = new Spanish();
//Language language = new Polish();
//Language language = new German();
JLanguageTool lt = new JLanguageTool(language);
System.out.println("Creating index for " + language + "...");
int ruleCount = createIndex(lt);
System.out.println("Index created with " + ruleCount + " rules");
int ruleCounter = 0;
int ruleProblems = 0;
int exceptionCount = 0;
List<Rule> rules = lt.getAllActiveRules();
for (Rule rule : rules) {
if (rule instanceof PatternRule && !rule.isDefaultOff()) {
PatternRule patternRule = (PatternRule) rule;
try {
ruleCounter++;
SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(patternRule, language);
List<MatchingSentence> matchingSentences = searcherResult.getMatchingSentences();
boolean foundExpectedMatch = false;
for (MatchingSentence matchingSentence : matchingSentences) {
List<RuleMatch> ruleMatches = matchingSentence.getRuleMatches();
List<String> ruleMatchIds = getRuleMatchIds(ruleMatches);
if (ruleMatchIds.contains(patternRule.getFullId())) {
// TODO: there can be more than one expected match, can't it?
foundExpectedMatch = true;
break;
}
}
if (!foundExpectedMatch) {
System.out.println("Error: No match found for " + patternRule);
System.out.println("Query : " + searcherResult.getRelaxedQuery().toString(FIELD_NAME_LOWERCASE));
System.out.println("Default field: " + FIELD_NAME_LOWERCASE);
System.out.println("Lucene Hits: " + searcherResult.getLuceneMatchCount());
System.out.println("Matches : " + matchingSentences);
System.out.println("Examples : " + rule.getIncorrectExamples());
System.out.println();
ruleProblems++;
} else {
//long time = System.currentTimeMillis() - startTime;
//System.out.println("Tested " + matchingSentences.size() + " sentences in " + time + "ms for rule " + patternRule);
}
} catch (UnsupportedPatternRuleException e) {
System.out.println("UnsupportedPatternRuleException searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
ruleProblems++;
} catch (Exception e) {
System.out.println("Exception searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
e.printStackTrace(System.out);
exceptionCount++;
}
}
}
System.out.println(language + ": problems: " + ruleProblems + ", total rules: " + ruleCounter);
System.out.println(language + ": exceptions: " + exceptionCount + " (including timeouts)");
System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms");
}
Aggregations