Search in sources :

Example 16 with PatternToken

use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.

the class IndexerSearcherTest method testApostropheElement.

public void testApostropheElement() throws Exception {
    createIndex("Daily Bleed's Anarchist Encyclopedia");
    List<PatternToken> elements1 = Arrays.asList(new PatternToken("Bleed", false, false, false), new PatternToken("'", false, false, false), new PatternToken("s", false, false, false));
    PatternRule rule1 = new PatternRule("RULE1", new English(), elements1, "desc", "msg", "shortMsg");
    List<PatternToken> elements2 = Arrays.asList(new PatternToken("Bleed", false, false, false), new PatternToken("'", false, false, false), new PatternToken("x", false, false, false));
    PatternRule rule2 = new PatternRule("RULE", new English(), elements2, "desc", "msg", "shortMsg");
    SearcherResult searcherResult1 = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
    assertEquals(1, searcherResult1.getMatchingSentences().size());
    List<RuleMatch> ruleMatches = searcherResult1.getMatchingSentences().get(0).getRuleMatches();
    assertEquals(1, ruleMatches.size());
    Rule rule = ruleMatches.get(0).getRule();
    assertEquals("RULE1", rule.getId());
    SearcherResult searcherResult2 = errorSearcher.findRuleMatchesOnIndex(rule2, new English());
    assertEquals(0, searcherResult2.getMatchingSentences().size());
}
Also used : English(org.languagetool.language.English) PatternToken(org.languagetool.rules.patterns.PatternToken) RuleMatch(org.languagetool.rules.RuleMatch) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule)

Example 17 with PatternToken

use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.

the class SimpleRuleCounter method countForLanguage.

private void countForLanguage(List<Rule> allRules, Language language) {
    int simpleCount = 0;
    for (Rule rule : allRules) {
        boolean isSimple = true;
        if (rule instanceof PatternRule) {
            PatternRule patternRule = (PatternRule) rule;
            List<PatternToken> tokens = patternRule.getPatternTokens();
            for (PatternToken token : tokens) {
                if (!isSimple(token)) {
                    isSimple = false;
                    break;
                }
            }
            if (isSimple) {
                simpleCount++;
            //System.out.println("Simple: " + patternRule.getId());
            //System.out.println(patternRule.toXML());
            //System.out.println("-------------------------");
            }
        }
    }
    float percent = (float) simpleCount / allRules.size() * 100;
    //System.out.printf(simpleCount + "/" + allRules.size() + " = %.0f%% for " + language + "\n", percent);
    System.out.printf("%.0f%% for " + language + "\n", percent);
}
Also used : PatternToken(org.languagetool.rules.patterns.PatternToken) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule) PatternRule(org.languagetool.rules.patterns.PatternRule)

Example 18 with PatternToken

use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.

the class DashRule method loadCompoundFile.

private void loadCompoundFile(String path) throws IOException {
    try (InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(path);
        InputStreamReader reader = new InputStreamReader(stream, "utf-8");
        BufferedReader br = new BufferedReader(reader)) {
        String line;
        int counter = 0;
        while ((line = br.readLine()) != null) {
            counter++;
            if (line.isEmpty() || line.charAt(0) == '#') {
                // ignore comments
                continue;
            }
            if (line.endsWith("+")) {
                // skip non-hyphenated suggestions
                continue;
            } else if (line.endsWith("*")) {
                line = removeLastCharacter(line);
            }
            List<PatternToken> tokList = new ArrayList<PatternToken>();
            String[] tokens = line.split("-");
            int tokenCounter = 0;
            for (String token : tokens) {
                tokenCounter++;
                // token
                tokList.add(new PatternToken(token, true, false, false));
                if (tokenCounter < tokens.length) {
                    // add dash
                    tokList.add(new PatternToken("[—–]", false, true, false));
                }
            }
            PatternRule dashRule = new PatternRule("DASH_RULE" + counter, Languages.getLanguageForName("Polish"), tokList, "", "Błędne użycie myślnika zamiast myślnika. " + "Poprawnie: <suggestion>" + line.replaceAll("[–—]", "-") + "</suggestion>.", line.replaceAll("[–—]", "-"));
            dashRules.add(dashRule);
        }
    }
}
Also used : PatternToken(org.languagetool.rules.patterns.PatternToken) InputStreamReader(java.io.InputStreamReader) PatternRule(org.languagetool.rules.patterns.PatternRule) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList)

Example 19 with PatternToken

use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.

the class SpellingCheckRule method getTokensForSentenceStart.

private List<PatternToken> getTokensForSentenceStart(String[] parts) {
    List<PatternToken> ucPatternTokens = new ArrayList<>();
    int j = 0;
    for (String part : parts) {
        if (j == 0) {
            // at sentence start, we also need to accept a phrase that starts with an uppercase char:
            String uppercased = StringTools.uppercaseFirstChar(part);
            ucPatternTokens.add(new PatternTokenBuilder().posRegex(JLanguageTool.SENTENCE_START_TAGNAME).build());
            ucPatternTokens.add(new PatternTokenBuilder().csToken(uppercased).build());
        } else {
            ucPatternTokens.add(new PatternTokenBuilder().csToken(part).build());
        }
        j++;
    }
    return ucPatternTokens;
}
Also used : PatternToken(org.languagetool.rules.patterns.PatternToken) PatternTokenBuilder(org.languagetool.rules.patterns.PatternTokenBuilder)

Example 20 with PatternToken

use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.

the class SpellingCheckRule method acceptPhrases.

/**
   * Accept (case-sensitively, unless at the start of a sentence) the given phrases even though they
   * are not in the built-in dictionary.
   * Use this to avoid false alarms on e.g. names and technical terms. Unlike {@link #addIgnoreTokens(List)}
   * this can deal with phrases. A way to call this is like this:
   * <code>rule.acceptPhrases(Arrays.asList("duodenal atresia"))</code>
   * This way, checking would not create an error for "duodenal atresia", but it would still
   * create and error for "duodenal" or "atresia" if they appear on their own.
   * @since 3.3
   */
public void acceptPhrases(List<String> phrases) {
    List<List<PatternToken>> antiPatterns = new ArrayList<>();
    for (String phrase : phrases) {
        String[] parts = phrase.split(" ");
        List<PatternToken> patternTokens = new ArrayList<>();
        int i = 0;
        boolean startsLowercase = false;
        for (String part : parts) {
            if (i == 0) {
                String uppercased = StringTools.uppercaseFirstChar(part);
                if (!uppercased.equals(part)) {
                    startsLowercase = true;
                }
            }
            patternTokens.add(new PatternTokenBuilder().csToken(part).build());
            i++;
        }
        antiPatterns.add(patternTokens);
        if (startsLowercase) {
            antiPatterns.add(getTokensForSentenceStart(parts));
        }
    }
    this.antiPatterns = makeAntiPatterns(antiPatterns, language);
}
Also used : PatternToken(org.languagetool.rules.patterns.PatternToken) PatternTokenBuilder(org.languagetool.rules.patterns.PatternTokenBuilder)

Aggregations

PatternToken (org.languagetool.rules.patterns.PatternToken)21 PatternRule (org.languagetool.rules.patterns.PatternRule)17 ArrayList (java.util.ArrayList)9 Test (org.junit.Test)8 English (org.languagetool.language.English)7 Rule (org.languagetool.rules.Rule)6 RuleMatch (org.languagetool.rules.RuleMatch)5 PatternTokenBuilder (org.languagetool.rules.patterns.PatternTokenBuilder)2 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 AmericanEnglish (org.languagetool.language.AmericanEnglish)1 BritishEnglish (org.languagetool.language.BritishEnglish)1 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)1