Search in sources :

Example 1 with AbstractPatternRule

use of org.languagetool.rules.patterns.AbstractPatternRule in project languagetool by languagetool-org.

the class ExampleSentenceCorrectionCreator method addAttribute.

// Note: this is a bad hack, we just iterate through the file's lines
private void addAttribute(Rule rule, String newAttribute, List<String> xmlLines) {
    List<Integer> linesToModify = new ArrayList<>();
    String currentRuleId = null;
    Pattern pattern = Pattern.compile(".*id=[\"'](.*?)[\"'].*");
    String expectedSubId = ((AbstractPatternRule) rule).getSubId();
    int lineCount = 0;
    int subRuleCount = 0;
    int modifyCount = 0;
    boolean inRuleGroup = false;
    for (String xmlLine : xmlLines) {
        if (xmlLine.contains("<rulegroup")) {
            subRuleCount = 0;
            inRuleGroup = true;
        } else if (xmlLine.contains("</rulegroup>")) {
            subRuleCount = 0;
            inRuleGroup = false;
        } else if ((xmlLine.contains("<rule ") || xmlLine.contains("<rule>")) && inRuleGroup) {
            subRuleCount++;
        }
        Matcher m = pattern.matcher(xmlLine);
        if (m.matches()) {
            currentRuleId = m.group(1);
        }
        if (xmlLine.contains("type=\"incorrect\"") || xmlLine.contains("type='incorrect'")) {
            if (currentRuleId != null && !currentRuleId.equals(rule.getId())) {
                lineCount++;
                continue;
            }
            if (!inRuleGroup) {
                subRuleCount = 1;
            }
            if (!expectedSubId.equals("0") && !expectedSubId.equals(String.valueOf(subRuleCount))) {
                lineCount++;
                continue;
            }
            linesToModify.add(lineCount);
            break;
        }
        lineCount++;
    }
    for (Integer s : linesToModify) {
        String newLine = xmlLines.get(s).replaceFirst("type=[\"']incorrect[\"']", newAttribute);
        xmlLines.set(s, newLine);
        addedCorrectionsCount++;
        modifyCount++;
    }
    if (modifyCount == 0) {
        System.err.println("No line modified: " + rule + "[" + expectedSubId + "]");
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Example 2 with AbstractPatternRule

use of org.languagetool.rules.patterns.AbstractPatternRule in project languagetool by languagetool-org.

the class MatchDatabase method add.

void add(WikipediaRuleMatch ruleMatch) {
    String sql = "INSERT INTO feed_matches " + "(title, language_code, rule_id, rule_sub_id, rule_description, rule_message, rule_category, error_context, edit_date, diff_id) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
    try (PreparedStatement prepSt = conn.prepareStatement(sql)) {
        prepSt.setString(1, StringUtils.abbreviate(ruleMatch.getTitle(), 255));
        prepSt.setString(2, ruleMatch.getLanguage().getShortCode());
        prepSt.setString(3, ruleMatch.getRule().getId());
        if (ruleMatch.getRule() instanceof AbstractPatternRule) {
            prepSt.setString(4, ((AbstractPatternRule) ruleMatch.getRule()).getSubId());
        } else {
            prepSt.setString(4, null);
        }
        prepSt.setString(5, StringUtils.abbreviate(ruleMatch.getRule().getDescription(), 255));
        prepSt.setString(6, StringUtils.abbreviate(ruleMatch.getMessage(), 255));
        if (ruleMatch.getRule().getCategory() != null) {
            prepSt.setString(7, StringUtils.abbreviate(ruleMatch.getRule().getCategory().getName(), 255));
        } else {
            prepSt.setString(7, "<no category>");
        }
        prepSt.setString(8, StringUtils.abbreviate(ruleMatch.getErrorContext(), 500));
        prepSt.setTimestamp(9, new Timestamp(ruleMatch.getEditDate().getTime()));
        prepSt.setLong(10, ruleMatch.getDiffId());
        prepSt.execute();
    } catch (SQLException e) {
        if (e.toString().contains("Incorrect string value")) {
            // Let's accept this - i.e. not crash - for now:
            // See http://stackoverflow.com/questions/1168036/ and http://stackoverflow.com/questions/10957238/
            System.err.println("Could not add rule match " + ruleMatch + " to database - stacktrace follows:");
            e.printStackTrace();
        } else {
            throw new RuntimeException("Could not add rule match " + ruleMatch + " to database", e);
        }
    }
}
Also used : SQLException(java.sql.SQLException) PreparedStatement(java.sql.PreparedStatement) Timestamp(java.sql.Timestamp) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Example 3 with AbstractPatternRule

use of org.languagetool.rules.patterns.AbstractPatternRule in project languagetool by languagetool-org.

the class WikipediaQuickCheck method main.

/*public static void mainTest(String[] args) throws IOException {
      TextFilter filter = new SwebleWikipediaTextFilter();
      String plainText = filter.filter("hallo\n* eins\n* zwei");
      System.out.println(plainText);
  }*/
public static void main(String[] args) throws IOException, PageNotFoundException {
    if (args.length != 1) {
        System.out.println("Usage: " + WikipediaQuickCheck.class.getName() + " <url>");
        System.exit(1);
    }
    WikipediaQuickCheck check = new WikipediaQuickCheck();
    // URL examples:
    //String urlString = "http://de.wikipedia.org/wiki/Angela_Merkel";
    //String urlString = "https://de.wikipedia.org/wiki/Benutzer_Diskussion:Dnaber";
    //String urlString = "https://secure.wikimedia.org/wikipedia/de/wiki/Gütersloh";
    String urlString = args[0];
    MarkupAwareWikipediaResult result = check.checkPage(new URL(urlString), new ErrorMarker("***", "***"));
    int errorCount = 0;
    for (AppliedRuleMatch match : result.getAppliedRuleMatches()) {
        RuleMatchApplication matchApplication = match.getRuleMatchApplications().get(0);
        RuleMatch ruleMatch = match.getRuleMatch();
        Rule rule = ruleMatch.getRule();
        System.out.println("");
        String message = ruleMatch.getMessage().replace("<suggestion>", "'").replace("</suggestion>", "'");
        errorCount++;
        System.out.print(errorCount + ". " + message);
        if (rule instanceof AbstractPatternRule) {
            System.out.println(" (" + ((AbstractPatternRule) rule).getFullId() + ")");
        } else {
            System.out.println(" (" + rule.getId() + ")");
        }
        System.out.println("    ..." + matchApplication.getOriginalErrorContext(50).replace("\n", "\\n") + "...");
    }
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Rule(org.languagetool.rules.Rule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) URL(java.net.URL) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Example 4 with AbstractPatternRule

use of org.languagetool.rules.patterns.AbstractPatternRule in project languagetool by languagetool-org.

the class PatternRuleQueryBuilderTest method testCaseSensitive.

public void testCaseSensitive() throws Exception {
    InputStream input = new ByteArrayInputStream(("<?xml version='1.0' encoding='UTF-8'?> <rules lang='en'> <category name='Test'>" + "<rule id='TEST_RULE_1' name='test_1'> <pattern case_sensitive='yes'><token>How</token></pattern> </rule>" + "<rule id='TEST_RULE_2' name='test_2'> <pattern case_sensitive='yes'><token>how</token>" + "</pattern> </rule>" + "<rule id='TEST_RULE_3' name='test_3'> <pattern><token>How</token></pattern> </rule>" + "<rule id='TEST_RULE_4' name='test_4'> <pattern><token>how</token></pattern> </rule>" + "</category> </rules>").getBytes());
    PatternRuleLoader ruleLoader = new PatternRuleLoader();
    List<AbstractPatternRule> rules = ruleLoader.getRules(input, "test.xml");
    PatternRuleQueryBuilder patternRuleQueryBuilder = new PatternRuleQueryBuilder(language, searcher);
    Query query1 = patternRuleQueryBuilder.buildRelaxedQuery(rules.get(0));
    assertEquals(1, searcher.search(query1, 1000).totalHits);
    Query query2 = patternRuleQueryBuilder.buildRelaxedQuery(rules.get(1));
    assertEquals(0, searcher.search(query2, 1000).totalHits);
    Query query3 = patternRuleQueryBuilder.buildRelaxedQuery(rules.get(2));
    assertEquals(1, searcher.search(query3, 1000).totalHits);
    Query query4 = patternRuleQueryBuilder.buildRelaxedQuery(rules.get(3));
    assertEquals(1, searcher.search(query4, 1000).totalHits);
}
Also used : Query(org.apache.lucene.search.Query) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) PatternRuleLoader(org.languagetool.rules.patterns.PatternRuleLoader) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Example 5 with AbstractPatternRule

use of org.languagetool.rules.patterns.AbstractPatternRule in project languagetool by languagetool-org.

the class PatternRuleQueryBuilderTest method makeRule.

private AbstractPatternRule makeRule(String ruleXml, boolean caseSensitive) throws IOException {
    StringBuilder sb = new StringBuilder();
    sb.append("<?xml version='1.0' encoding='UTF-8'?>");
    sb.append("<rules lang='en'> <category name='Test'> <rule id='TEST_RULE' name='test'>");
    if (caseSensitive) {
        sb.append("<pattern case_sensitive='yes'>");
    } else {
        sb.append("<pattern>");
    }
    sb.append(ruleXml);
    sb.append("</pattern> </rule> </category> </rules>");
    InputStream input = new ByteArrayInputStream(sb.toString().getBytes());
    PatternRuleLoader ruleLoader = new PatternRuleLoader();
    List<AbstractPatternRule> rules = ruleLoader.getRules(input, "test.xml");
    assertEquals(1, rules.size());
    return rules.get(0);
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) PatternRuleLoader(org.languagetool.rules.patterns.PatternRuleLoader) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Aggregations

AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)22 RuleMatch (org.languagetool.rules.RuleMatch)7 Rule (org.languagetool.rules.Rule)5 ArrayList (java.util.ArrayList)4 InputStream (java.io.InputStream)3 JLanguageTool (org.languagetool.JLanguageTool)3 PatternRuleLoader (org.languagetool.rules.patterns.PatternRuleLoader)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 Query (org.apache.lucene.search.Query)2 Test (org.junit.Test)2 Language (org.languagetool.Language)2 BitextRule (org.languagetool.rules.bitext.BitextRule)2 BufferedInputStream (java.io.BufferedInputStream)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1