use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.
the class IndexerSearcherTest method testApostropheElement.
public void testApostropheElement() throws Exception {
createIndex("Daily Bleed's Anarchist Encyclopedia");
List<PatternToken> elements1 = Arrays.asList(new PatternToken("Bleed", false, false, false), new PatternToken("'", false, false, false), new PatternToken("s", false, false, false));
PatternRule rule1 = new PatternRule("RULE1", new English(), elements1, "desc", "msg", "shortMsg");
List<PatternToken> elements2 = Arrays.asList(new PatternToken("Bleed", false, false, false), new PatternToken("'", false, false, false), new PatternToken("x", false, false, false));
PatternRule rule2 = new PatternRule("RULE", new English(), elements2, "desc", "msg", "shortMsg");
SearcherResult searcherResult1 = errorSearcher.findRuleMatchesOnIndex(rule1, new English());
assertEquals(1, searcherResult1.getMatchingSentences().size());
List<RuleMatch> ruleMatches = searcherResult1.getMatchingSentences().get(0).getRuleMatches();
assertEquals(1, ruleMatches.size());
Rule rule = ruleMatches.get(0).getRule();
assertEquals("RULE1", rule.getId());
SearcherResult searcherResult2 = errorSearcher.findRuleMatchesOnIndex(rule2, new English());
assertEquals(0, searcherResult2.getMatchingSentences().size());
}
use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.
the class SimpleRuleCounter method countForLanguage.
private void countForLanguage(List<Rule> allRules, Language language) {
int simpleCount = 0;
for (Rule rule : allRules) {
boolean isSimple = true;
if (rule instanceof PatternRule) {
PatternRule patternRule = (PatternRule) rule;
List<PatternToken> tokens = patternRule.getPatternTokens();
for (PatternToken token : tokens) {
if (!isSimple(token)) {
isSimple = false;
break;
}
}
if (isSimple) {
simpleCount++;
//System.out.println("Simple: " + patternRule.getId());
//System.out.println(patternRule.toXML());
//System.out.println("-------------------------");
}
}
}
float percent = (float) simpleCount / allRules.size() * 100;
//System.out.printf(simpleCount + "/" + allRules.size() + " = %.0f%% for " + language + "\n", percent);
System.out.printf("%.0f%% for " + language + "\n", percent);
}
use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.
the class DashRule method loadCompoundFile.
private void loadCompoundFile(String path) throws IOException {
try (InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(path);
InputStreamReader reader = new InputStreamReader(stream, "utf-8");
BufferedReader br = new BufferedReader(reader)) {
String line;
int counter = 0;
while ((line = br.readLine()) != null) {
counter++;
if (line.isEmpty() || line.charAt(0) == '#') {
// ignore comments
continue;
}
if (line.endsWith("+")) {
// skip non-hyphenated suggestions
continue;
} else if (line.endsWith("*")) {
line = removeLastCharacter(line);
}
List<PatternToken> tokList = new ArrayList<PatternToken>();
String[] tokens = line.split("-");
int tokenCounter = 0;
for (String token : tokens) {
tokenCounter++;
// token
tokList.add(new PatternToken(token, true, false, false));
if (tokenCounter < tokens.length) {
// add dash
tokList.add(new PatternToken("[—–]", false, true, false));
}
}
PatternRule dashRule = new PatternRule("DASH_RULE" + counter, Languages.getLanguageForName("Polish"), tokList, "", "Błędne użycie myślnika zamiast myślnika. " + "Poprawnie: <suggestion>" + line.replaceAll("[–—]", "-") + "</suggestion>.", line.replaceAll("[–—]", "-"));
dashRules.add(dashRule);
}
}
}
use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.
the class SpellingCheckRule method getTokensForSentenceStart.
private List<PatternToken> getTokensForSentenceStart(String[] parts) {
List<PatternToken> ucPatternTokens = new ArrayList<>();
int j = 0;
for (String part : parts) {
if (j == 0) {
// at sentence start, we also need to accept a phrase that starts with an uppercase char:
String uppercased = StringTools.uppercaseFirstChar(part);
ucPatternTokens.add(new PatternTokenBuilder().posRegex(JLanguageTool.SENTENCE_START_TAGNAME).build());
ucPatternTokens.add(new PatternTokenBuilder().csToken(uppercased).build());
} else {
ucPatternTokens.add(new PatternTokenBuilder().csToken(part).build());
}
j++;
}
return ucPatternTokens;
}
use of org.languagetool.rules.patterns.PatternToken in project languagetool by languagetool-org.
the class SpellingCheckRule method acceptPhrases.
/**
* Accept (case-sensitively, unless at the start of a sentence) the given phrases even though they
* are not in the built-in dictionary.
* Use this to avoid false alarms on e.g. names and technical terms. Unlike {@link #addIgnoreTokens(List)}
* this can deal with phrases. A way to call this is like this:
* <code>rule.acceptPhrases(Arrays.asList("duodenal atresia"))</code>
* This way, checking would not create an error for "duodenal atresia", but it would still
* create and error for "duodenal" or "atresia" if they appear on their own.
* @since 3.3
*/
public void acceptPhrases(List<String> phrases) {
List<List<PatternToken>> antiPatterns = new ArrayList<>();
for (String phrase : phrases) {
String[] parts = phrase.split(" ");
List<PatternToken> patternTokens = new ArrayList<>();
int i = 0;
boolean startsLowercase = false;
for (String part : parts) {
if (i == 0) {
String uppercased = StringTools.uppercaseFirstChar(part);
if (!uppercased.equals(part)) {
startsLowercase = true;
}
}
patternTokens.add(new PatternTokenBuilder().csToken(part).build());
i++;
}
antiPatterns.add(patternTokens);
if (startsLowercase) {
antiPatterns.add(getTokensForSentenceStart(parts));
}
}
this.antiPatterns = makeAntiPatterns(antiPatterns, language);
}
Aggregations