Search in sources :

Example 81 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class Searcher method main.

public static void main(String[] args) throws Exception {
    ensureCorrectUsageOrExit(args);
    long startTime = System.currentTimeMillis();
    String[] ruleIds = args[0].split(",");
    String languageCode = args[1];
    Language language = Languages.getLanguageForShortCode(languageCode);
    File indexDir = new File(args[2]);
    boolean limitSearch = !(args.length > 3 && "--no_limit".equals(args[3]));
    Searcher searcher = new Searcher(new SimpleFSDirectory(indexDir.toPath()));
    if (!limitSearch) {
        searcher.setMaxHits(100_000);
    }
    searcher.limitSearch = limitSearch;
    ContextTools contextTools = getContextTools(140);
    int totalMatches = 0;
    for (String ruleId : ruleIds) {
        long ruleStartTime = System.currentTimeMillis();
        for (PatternRule rule : searcher.getRuleById(ruleId, language)) {
            System.out.println("===== " + rule.getFullId() + " =========================================================");
            SearcherResult searcherResult = searcher.findRuleMatchesOnIndex(rule, language);
            int i = 1;
            if (searcherResult.getMatchingSentences().size() == 0) {
                System.out.println("[no matches]");
            }
            for (MatchingSentence ruleMatch : searcherResult.getMatchingSentences()) {
                for (RuleMatch match : ruleMatch.getRuleMatches()) {
                    String context = contextTools.getContext(match.getFromPos(), match.getToPos(), ruleMatch.getSentence());
                    if (WIKITEXT_OUTPUT) {
                        ContextTools contextTools2 = getContextTools(0);
                        String coveredText = contextTools2.getContext(match.getFromPos(), match.getToPos(), ruleMatch.getSentence());
                        coveredText = coveredText.replaceFirst("^\\.\\.\\.", "").replaceFirst("\\.\\.\\.$", "");
                        coveredText = coveredText.replaceFirst("^\\*\\*", "").replaceFirst("\\*\\*$", "");
                        String encodedTextWithQuotes = URLEncoder.encode("\"" + coveredText + "\"", "UTF-8");
                        String searchLink = "https://de.wikipedia.org/w/index.php?search=" + encodedTextWithQuotes + "&title=Spezial%3ASuche&go=Artikel";
                        context = context.replaceAll("\\*\\*.*?\\*\\*", "[" + searchLink + " " + coveredText + "]");
                        String encTitle = URLEncoder.encode(ruleMatch.getTitle(), "UTF-8");
                        String encodedText = URLEncoder.encode(coveredText, "UTF-8");
                        System.out.println("# [[" + ruleMatch.getTitle() + "]]: " + context + " ([http://wikipedia.ramselehof.de/wikiblame.php?user_lang=de&lang=de&project=wikipedia&article=" + encTitle + "&needle=" + encodedText + "&skipversions=0&ignorefirst=0&limit=500&searchmethod=int&order=desc&start=Start WikiBlame])");
                    } else {
                        System.out.println(i + ": " + context + " [" + ruleMatch.getSource() + "]");
                    }
                }
                totalMatches += ruleMatch.getRuleMatches().size();
                i++;
            }
            System.out.println("Time: " + (System.currentTimeMillis() - ruleStartTime) + "ms");
        }
    }
    System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms, " + totalMatches + " matches");
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) ContextTools(org.languagetool.tools.ContextTools) RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) File(java.io.File)

Example 82 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class Searcher method findMatchingSentences.

private List<MatchingSentence> findMatchingSentences(IndexSearcher indexSearcher, TopDocs topDocs, JLanguageTool languageTool) throws IOException {
    List<MatchingSentence> matchingSentences = new ArrayList<>();
    for (ScoreDoc match : topDocs.scoreDocs) {
        Document doc = indexSearcher.doc(match.doc);
        String sentence = doc.get(FIELD_NAME);
        List<RuleMatch> ruleMatches = languageTool.check(sentence);
        if (ruleMatches.size() > 0) {
            String source = doc.get(SOURCE_FIELD_NAME);
            String title = doc.get(Indexer.TITLE_FIELD_NAME);
            AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
            MatchingSentence matchingSentence = new MatchingSentence(sentence, source, title, analyzedSentence, ruleMatches);
            matchingSentences.add(matchingSentence);
        }
    }
    return matchingSentences;
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) AnalyzedSentence(org.languagetool.AnalyzedSentence) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document)

Example 83 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class ToolsTest method testBitextCheck.

private void testBitextCheck(ResultCache cache) throws IOException, ParserConfigurationException, SAXException {
    Language english = Languages.getLanguageForShortCode("en");
    JLanguageTool srcTool = new JLanguageTool(english, null, cache);
    Language polish = Languages.getLanguageForShortCode("pl");
    JLanguageTool trgTool = new JLanguageTool(polish, null, cache);
    List<BitextRule> rules = Tools.getBitextRules(english, polish);
    int matchCount = Tools.checkBitext("This is a perfectly good sentence.", "To jest całkowicie prawidłowe zdanie.", srcTool, trgTool, rules).size();
    assertEquals(0, matchCount);
    List<RuleMatch> matches1 = Tools.checkBitext("This is not actual.", "To nie jest aktualne.", srcTool, trgTool, rules);
    assertEquals(1, matches1.size());
    assertThat(matches1.get(0).getRule().getId(), is("ACTUAL"));
    assertThat(matches1.get(0).getFromPos(), is(12));
    assertThat(matches1.get(0).getToPos(), is(20));
    List<RuleMatch> matches2 = Tools.checkBitext("A sentence. This is not actual.", "Zdanie. To nie jest aktualne.", srcTool, trgTool, rules);
    assertEquals(1, matches2.size());
    assertThat(matches2.get(0).getRule().getId(), is("ACTUAL"));
    assertThat(matches2.get(0).getFromPos(), is(20));
    assertThat(matches2.get(0).getToPos(), is(28));
    List<RuleMatch> matches3 = Tools.checkBitext("A new sentence. This is not actual.", "Nowa zdanie. To nie jest aktualne.", srcTool, trgTool, rules);
    assertEquals(1, matches3.size());
    assertThat(matches3.get(0).getRule().getId(), is("ACTUAL"));
    assertThat(matches3.get(0).getFromPos(), is(25));
    assertThat(matches3.get(0).getToPos(), is(33));
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) JLanguageTool(org.languagetool.JLanguageTool) BitextRule(org.languagetool.rules.bitext.BitextRule)

Example 84 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class IndexerSearcherTest method testNegatedMatchAtSentenceStart.

public void testNegatedMatchAtSentenceStart() throws Exception {
    createIndex("How to move?");
    PatternToken negatedPatternToken = new PatternToken("Negated", false, false, false);
    negatedPatternToken.setNegation(true);
    List<PatternToken> patternTokens = Arrays.asList(negatedPatternToken, new PatternToken("How", false, false, false));
    Searcher errorSearcher = new Searcher(directory);
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult.getCheckedSentences());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Example 85 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithRegexRule.

public void testWithRegexRule() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), new PatternToken("forth|back", false, true, false));
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult.getCheckedSentences());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Aggregations

RuleMatch (org.languagetool.rules.RuleMatch)144 Test (org.junit.Test)64 JLanguageTool (org.languagetool.JLanguageTool)54 ArrayList (java.util.ArrayList)30 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)14 Rule (org.languagetool.rules.Rule)14 Language (org.languagetool.Language)10 PatternRule (org.languagetool.rules.patterns.PatternRule)10 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 Ukrainian (org.languagetool.language.Ukrainian)8 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)8 Matcher (java.util.regex.Matcher)7 English (org.languagetool.language.English)7 IOException (java.io.IOException)6 Catalan (org.languagetool.language.Catalan)6 Polish (org.languagetool.language.Polish)6 GermanyGerman (org.languagetool.language.GermanyGerman)5 AnnotatedText (org.languagetool.markup.AnnotatedText)5 PatternToken (org.languagetool.rules.patterns.PatternToken)5 AnalyzedToken (org.languagetool.AnalyzedToken)4