Search in sources :

Example 16 with PatternRule

use of org.languagetool.rules.patterns.PatternRule in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithNewRule.

public void testWithNewRule() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), new PatternToken("back", false, false, false));
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult.getCheckedSentences());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Example 17 with PatternRule

use of org.languagetool.rules.patterns.PatternRule in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithOneElementWithException.

public void testWithOneElementWithException() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    PatternToken exceptionElem = new PatternToken("", false, true, false);
    exceptionElem.setStringPosException("exception", false, false, false, false, false, "POS", false, false, null);
    List<PatternToken> patternTokens = Arrays.asList(exceptionElem);
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    try {
        errorSearcher.findRuleMatchesOnIndex(rule1, new English());
        fail();
    } catch (UnsupportedPatternRuleException ignored) {
    }
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) PatternRule(org.languagetool.rules.patterns.PatternRule)

Example 18 with PatternRule

use of org.languagetool.rules.patterns.PatternRule in project languagetool by languagetool-org.

the class IndexerSearcherTest method testWithException.

public void testWithException() throws Exception {
    createIndex("How to move back and fourth from linux to xmb?");
    PatternToken exceptionElem = new PatternToken("forth|back", false, true, false);
    exceptionElem.setStringPosException("exception", false, false, false, false, false, "POS", false, false, null);
    List<PatternToken> patternTokens = Arrays.asList(new PatternToken("move", false, false, false), exceptionElem);
    PatternRule rule1 = new PatternRule("RULE1", new English(), patternTokens, "desc", "msg", "shortMsg");
    Searcher errorSearcher = new Searcher(directory);
    SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult.getCheckedSentences());
    assertEquals(1, searcherResult.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Example 19 with PatternRule

use of org.languagetool.rules.patterns.PatternRule in project languagetool by languagetool-org.

the class Searcher method main.

public static void main(String[] args) throws Exception {
    ensureCorrectUsageOrExit(args);
    long startTime = System.currentTimeMillis();
    String[] ruleIds = args[0].split(",");
    String languageCode = args[1];
    Language language = Languages.getLanguageForShortCode(languageCode);
    File indexDir = new File(args[2]);
    boolean limitSearch = !(args.length > 3 && "--no_limit".equals(args[3]));
    Searcher searcher = new Searcher(new SimpleFSDirectory(indexDir.toPath()));
    if (!limitSearch) {
        searcher.setMaxHits(100_000);
    }
    searcher.limitSearch = limitSearch;
    ContextTools contextTools = getContextTools(140);
    int totalMatches = 0;
    for (String ruleId : ruleIds) {
        long ruleStartTime = System.currentTimeMillis();
        for (PatternRule rule : searcher.getRuleById(ruleId, language)) {
            System.out.println("===== " + rule.getFullId() + " =========================================================");
            SearcherResult searcherResult = searcher.findRuleMatchesOnIndex(rule, language);
            int i = 1;
            if (searcherResult.getMatchingSentences().size() == 0) {
                System.out.println("[no matches]");
            }
            for (MatchingSentence ruleMatch : searcherResult.getMatchingSentences()) {
                for (RuleMatch match : ruleMatch.getRuleMatches()) {
                    String context = contextTools.getContext(match.getFromPos(), match.getToPos(), ruleMatch.getSentence());
                    if (WIKITEXT_OUTPUT) {
                        ContextTools contextTools2 = getContextTools(0);
                        String coveredText = contextTools2.getContext(match.getFromPos(), match.getToPos(), ruleMatch.getSentence());
                        coveredText = coveredText.replaceFirst("^\\.\\.\\.", "").replaceFirst("\\.\\.\\.$", "");
                        coveredText = coveredText.replaceFirst("^\\*\\*", "").replaceFirst("\\*\\*$", "");
                        String encodedTextWithQuotes = URLEncoder.encode("\"" + coveredText + "\"", "UTF-8");
                        String searchLink = "https://de.wikipedia.org/w/index.php?search=" + encodedTextWithQuotes + "&title=Spezial%3ASuche&go=Artikel";
                        context = context.replaceAll("\\*\\*.*?\\*\\*", "[" + searchLink + " " + coveredText + "]");
                        String encTitle = URLEncoder.encode(ruleMatch.getTitle(), "UTF-8");
                        String encodedText = URLEncoder.encode(coveredText, "UTF-8");
                        System.out.println("# [[" + ruleMatch.getTitle() + "]]: " + context + " ([http://wikipedia.ramselehof.de/wikiblame.php?user_lang=de&lang=de&project=wikipedia&article=" + encTitle + "&needle=" + encodedText + "&skipversions=0&ignorefirst=0&limit=500&searchmethod=int&order=desc&start=Start WikiBlame])");
                    } else {
                        System.out.println(i + ": " + context + " [" + ruleMatch.getSource() + "]");
                    }
                }
                totalMatches += ruleMatch.getRuleMatches().size();
                i++;
            }
            System.out.println("Time: " + (System.currentTimeMillis() - ruleStartTime) + "ms");
        }
    }
    System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms, " + totalMatches + " matches");
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) ContextTools(org.languagetool.tools.ContextTools) RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) File(java.io.File)

Example 20 with PatternRule

use of org.languagetool.rules.patterns.PatternRule in project languagetool by languagetool-org.

the class Searcher method getRuleById.

List<PatternRule> getRuleById(String ruleId, Language language) throws IOException {
    List<PatternRule> rules = new ArrayList<>();
    JLanguageTool langTool = new JLanguageTool(language);
    for (Rule rule : langTool.getAllRules()) {
        if (rule.getId().equals(ruleId) && rule instanceof PatternRule) {
            rules.add((PatternRule) rule);
        }
    }
    if (rules.size() > 0) {
        return rules;
    } else {
        throw new PatternRuleNotFoundException(ruleId, language);
    }
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) JLanguageTool(org.languagetool.JLanguageTool) ArrayList(java.util.ArrayList) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Aggregations

PatternRule (org.languagetool.rules.patterns.PatternRule)28 PatternToken (org.languagetool.rules.patterns.PatternToken)17 Rule (org.languagetool.rules.Rule)13 ArrayList (java.util.ArrayList)12 RuleMatch (org.languagetool.rules.RuleMatch)9 Test (org.junit.Test)8 English (org.languagetool.language.English)8 JLanguageTool (org.languagetool.JLanguageTool)6 File (java.io.File)4 FileReader (java.io.FileReader)3 IncorrectExample (org.languagetool.rules.IncorrectExample)3 Ignore (org.junit.Ignore)2 Language (org.languagetool.Language)2 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)2 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1