use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class Searcher method main.
public static void main(String[] args) throws Exception {
ensureCorrectUsageOrExit(args);
long startTime = System.currentTimeMillis();
String[] ruleIds = args[0].split(",");
String languageCode = args[1];
Language language = Languages.getLanguageForShortCode(languageCode);
File indexDir = new File(args[2]);
boolean limitSearch = !(args.length > 3 && "--no_limit".equals(args[3]));
Searcher searcher = new Searcher(new SimpleFSDirectory(indexDir.toPath()));
if (!limitSearch) {
searcher.setMaxHits(100_000);
}
searcher.limitSearch = limitSearch;
ContextTools contextTools = getContextTools(140);
int totalMatches = 0;
for (String ruleId : ruleIds) {
long ruleStartTime = System.currentTimeMillis();
for (PatternRule rule : searcher.getRuleById(ruleId, language)) {
System.out.println("===== " + rule.getFullId() + " =========================================================");
SearcherResult searcherResult = searcher.findRuleMatchesOnIndex(rule, language);
int i = 1;
if (searcherResult.getMatchingSentences().size() == 0) {
System.out.println("[no matches]");
}
for (MatchingSentence ruleMatch : searcherResult.getMatchingSentences()) {
for (RuleMatch match : ruleMatch.getRuleMatches()) {
String context = contextTools.getContext(match.getFromPos(), match.getToPos(), ruleMatch.getSentence());
if (WIKITEXT_OUTPUT) {
ContextTools contextTools2 = getContextTools(0);
String coveredText = contextTools2.getContext(match.getFromPos(), match.getToPos(), ruleMatch.getSentence());
coveredText = coveredText.replaceFirst("^\\.\\.\\.", "").replaceFirst("\\.\\.\\.$", "");
coveredText = coveredText.replaceFirst("^\\*\\*", "").replaceFirst("\\*\\*$", "");
String encodedTextWithQuotes = URLEncoder.encode("\"" + coveredText + "\"", "UTF-8");
String searchLink = "https://de.wikipedia.org/w/index.php?search=" + encodedTextWithQuotes + "&title=Spezial%3ASuche&go=Artikel";
context = context.replaceAll("\\*\\*.*?\\*\\*", "[" + searchLink + " " + coveredText + "]");
String encTitle = URLEncoder.encode(ruleMatch.getTitle(), "UTF-8");
String encodedText = URLEncoder.encode(coveredText, "UTF-8");
System.out.println("# [[" + ruleMatch.getTitle() + "]]: " + context + " ([http://wikipedia.ramselehof.de/wikiblame.php?user_lang=de&lang=de&project=wikipedia&article=" + encTitle + "&needle=" + encodedText + "&skipversions=0&ignorefirst=0&limit=500&searchmethod=int&order=desc&start=Start WikiBlame])");
} else {
System.out.println(i + ": " + context + " [" + ruleMatch.getSource() + "]");
}
}
totalMatches += ruleMatch.getRuleMatches().size();
i++;
}
System.out.println("Time: " + (System.currentTimeMillis() - ruleStartTime) + "ms");
}
}
System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms, " + totalMatches + " matches");
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class Searcher method findMatchingSentences.
private List<MatchingSentence> findMatchingSentences(IndexSearcher indexSearcher, TopDocs topDocs, JLanguageTool languageTool) throws IOException {
List<MatchingSentence> matchingSentences = new ArrayList<>();
for (ScoreDoc match : topDocs.scoreDocs) {
Document doc = indexSearcher.doc(match.doc);
String sentence = doc.get(FIELD_NAME);
List<RuleMatch> ruleMatches = languageTool.check(sentence);
if (ruleMatches.size() > 0) {
String source = doc.get(SOURCE_FIELD_NAME);
String title = doc.get(Indexer.TITLE_FIELD_NAME);
AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
MatchingSentence matchingSentence = new MatchingSentence(sentence, source, title, analyzedSentence, ruleMatches);
matchingSentences.add(matchingSentence);
}
}
return matchingSentences;
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class ToolsTest method testBitextCheck.
private void testBitextCheck(ResultCache cache) throws IOException, ParserConfigurationException, SAXException {
Language english = Languages.getLanguageForShortCode("en");
JLanguageTool srcTool = new JLanguageTool(english, null, cache);
Language polish = Languages.getLanguageForShortCode("pl");
JLanguageTool trgTool = new JLanguageTool(polish, null, cache);
List<BitextRule> rules = Tools.getBitextRules(english, polish);
int matchCount = Tools.checkBitext("This is a perfectly good sentence.", "To jest całkowicie prawidłowe zdanie.", srcTool, trgTool, rules).size();
assertEquals(0, matchCount);
List<RuleMatch> matches1 = Tools.checkBitext("This is not actual.", "To nie jest aktualne.", srcTool, trgTool, rules);
assertEquals(1, matches1.size());
assertThat(matches1.get(0).getRule().getId(), is("ACTUAL"));
assertThat(matches1.get(0).getFromPos(), is(12));
assertThat(matches1.get(0).getToPos(), is(20));
List<RuleMatch> matches2 = Tools.checkBitext("A sentence. This is not actual.", "Zdanie. To nie jest aktualne.", srcTool, trgTool, rules);
assertEquals(1, matches2.size());
assertThat(matches2.get(0).getRule().getId(), is("ACTUAL"));
assertThat(matches2.get(0).getFromPos(), is(20));
assertThat(matches2.get(0).getToPos(), is(28));
List<RuleMatch> matches3 = Tools.checkBitext("A new sentence. This is not actual.", "Nowa zdanie. To nie jest aktualne.", srcTool, trgTool, rules);
assertEquals(1, matches3.size());
assertThat(matches3.get(0).getRule().getId(), is("ACTUAL"));
assertThat(matches3.get(0).getFromPos(), is(25));
assertThat(matches3.get(0).getToPos(), is(33));
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class IndexerSearcherTest method testNegatedMatchAtSentenceStart.
public void testNegatedMatchAtSentenceStart() throws Exception {
createIndex("How to move?");
PatternToken negatedPatternToken = new PatternToken("Negated", false, false, false);
negatedPatternToken.setNegation(true);
List<PatternToken> patternTokens = Arrays.asList(negatedPatternToken, new PatternToken("How", false, false, false));
Searcher errorSearcher = new Searcher(directory);
PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
assertEquals(1, searcherResult.getCheckedSentences());
assertEquals(1, searcherResult.getMatchingSentences().size());
List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
assertEquals(1, ruleMatches.size());
Rule rule = ruleMatches.get(0).getRule();
assertEquals("RULE1", rule.getId());
}
use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.
the class IndexerSearcherTest method testWithRegexRule.
public void testWithRegexRule() throws Exception {
createIndex("How to move back and fourth from linux to xmb?");
List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), new PatternToken("forth|back", false, true, false));
PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
Searcher errorSearcher = new Searcher(directory);
SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
assertEquals(1, searcherResult.getCheckedSentences());
assertEquals(1, searcherResult.getMatchingSentences().size());
List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
assertEquals(1, ruleMatches.size());
Rule rule = ruleMatches.get(0).getRule();
assertEquals("RULE1", rule.getId());
}
Aggregations