use of org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule in project languagetool by languagetool-org.
the class PatternRuleTest method runTestForLanguage.
public void runTestForLanguage(Language lang) throws IOException {
validatePatternFile(lang);
System.out.print("Running pattern rule tests for " + lang.getName() + "... ");
MultiThreadedJLanguageTool languageTool = new MultiThreadedJLanguageTool(lang);
if (CHECK_WITH_SENTENCE_SPLITTING) {
disableSpellingRules(languageTool);
}
MultiThreadedJLanguageTool allRulesLanguageTool = new MultiThreadedJLanguageTool(lang);
validateRuleIds(lang, allRulesLanguageTool);
List<AbstractPatternRule> rules = getAllPatternRules(lang, languageTool);
for (AbstractPatternRule rule : rules) {
// Test the rule pattern.
/* check for useless 'marker' elements commented out - too slow to always run:
PatternRuleXmlCreator creator = new PatternRuleXmlCreator();
String xml = creator.toXML(rule.getPatternRuleId(), lang);
if (PATTERN_MARKER_START.matcher(xml).matches() && PATTERN_MARKER_END.matcher(xml).matches()) {
System.err.println("WARNING " + lang + ": useless <marker>: " + rule.getFullId());
}*/
// too aggressive for now:
//PatternTestTools.failIfWhitespaceInToken(rule.getPatternTokens(), rule, lang);
PatternTestTools.warnIfRegexpSyntaxNotKosher(rule.getPatternTokens(), rule.getId(), rule.getSubId(), lang);
// Test the rule antipatterns.
List<DisambiguationPatternRule> antiPatterns = rule.getAntiPatterns();
for (DisambiguationPatternRule antiPattern : antiPatterns) {
PatternTestTools.warnIfRegexpSyntaxNotKosher(antiPattern.getPatternTokens(), antiPattern.getId(), antiPattern.getSubId(), lang);
}
if (rule.getCorrectExamples().size() == 0) {
boolean correctionExists = false;
for (IncorrectExample incorrectExample : rule.getIncorrectExamples()) {
if (incorrectExample.getCorrections().size() > 0) {
correctionExists = true;
break;
}
}
if (!correctionExists) {
fail("Rule " + rule.getFullId() + " in language " + lang + " needs at least one <example> with a 'correction' attribute" + " or one <example> of type='correct'.");
}
}
}
testGrammarRulesFromXML(rules, languageTool, allRulesLanguageTool, lang);
System.out.println(rules.size() + " rules tested.");
allRulesLanguageTool.shutdown();
languageTool.shutdown();
}
use of org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule in project languagetool by languagetool-org.
the class PatternRuleHandler method endElement.
@Override
public void endElement(String namespaceURI, String sName, String qName) throws SAXException {
switch(qName) {
case "category":
categoryIssueType = null;
break;
case "regexp":
inRegex = false;
break;
case RULE:
suggestionMatchesOutMsg = addLegacyMatches(suggestionMatchesOutMsg, suggestionsOutMsg.toString(), false);
if (relaxedMode && id == null) {
id = "";
}
if (relaxedMode && name == null) {
name = "";
}
if (phrasePatternTokens.isEmpty()) {
// Elements contain information whether they are inside a <marker>...</marker>,
// but for phraserefs this depends on the position where the phraseref is used
// not where it's defined. Thus we have to copy the elements so each use of
// the phraseref can carry their own information:
List<PatternToken> tmpPatternTokens = new ArrayList<>();
createRules(new ArrayList<>(patternTokens), tmpPatternTokens, 0);
} else {
if (!patternTokens.isEmpty()) {
for (List<PatternToken> ph : phrasePatternTokens) {
ph.addAll(new ArrayList<>(patternTokens));
}
}
for (List<PatternToken> phrasePatternToken : phrasePatternTokens) {
processElement(phrasePatternToken);
List<PatternToken> tmpPatternTokens = new ArrayList<>();
createRules(phrasePatternToken, tmpPatternTokens, 0);
}
}
patternTokens.clear();
if (phrasePatternTokens != null) {
phrasePatternTokens.clear();
}
ruleIssueType = null;
inRule = false;
filterClassName = null;
filterArgs = null;
break;
case EXCEPTION:
finalizeExceptions();
break;
case AND:
inAndGroup = false;
andGroupCounter = 0;
tokenCounter++;
break;
case OR:
inOrGroup = false;
orGroupCounter = 0;
tokenCounter++;
break;
case TOKEN:
finalizeTokens();
break;
case PATTERN:
inPattern = false;
if (lastPhrase) {
patternTokens.clear();
}
tokenCounter = 0;
break;
case ANTIPATTERN:
String antiId = id;
if (inRuleGroup) {
if (subId > 0) {
antiId = ruleGroupId + "[" + subId + "]";
} else {
antiId = ruleGroupId;
}
}
DisambiguationPatternRule rule = new DisambiguationPatternRule(antiId + "_antipattern:" + antiPatternCounter, "antipattern", language, patternTokens, null, null, DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE);
if (startPos != -1 && endPos != -1) {
rule.setStartPositionCorrection(startPos);
rule.setEndPositionCorrection(endPos - tokenCountForMarker);
} else {
// around issue https://github.com/languagetool-org/languagetool/issues/189:
for (PatternToken patternToken : patternTokens) {
patternToken.setInsideMarker(true);
}
}
patternTokens.clear();
if (inRule) {
ruleAntiPatterns.add(rule);
} else {
// a rulegroup shares all antipatterns not included in a single rule
rulegroupAntiPatterns.add(rule);
}
tokenCounter = 0;
inAntiPattern = false;
break;
case EXAMPLE:
if (inCorrectExample) {
correctExamples.add(new CorrectExample(correctExample.toString()));
} else if (inIncorrectExample) {
IncorrectExample example;
List<String> corrections = new ArrayList<>();
corrections.addAll(Arrays.asList(exampleCorrection.toString().split("\\|")));
if (corrections.size() > 0) {
if (exampleCorrection.toString().endsWith("|")) {
// split() will ignore trailing empty items
corrections.add("");
}
example = new IncorrectExample(incorrectExample.toString(), corrections);
} else {
example = new IncorrectExample(incorrectExample.toString());
}
incorrectExamples.add(example);
} else if (inErrorTriggerExample) {
errorTriggeringExamples.add(new ErrorTriggeringExample(errorTriggerExample.toString()));
}
inCorrectExample = false;
inIncorrectExample = false;
inErrorTriggerExample = false;
correctExample = new StringBuilder();
incorrectExample = new StringBuilder();
errorTriggerExample = new StringBuilder();
exampleCorrection = new StringBuilder();
break;
case MESSAGE:
suggestionMatches = addLegacyMatches(suggestionMatches, message.toString(), true);
inMessage = false;
break;
case SUGGESTION:
if (inMessage) {
message.append("</suggestion>");
} else {
//suggestion outside message
suggestionsOutMsg.append("</suggestion>");
}
inSuggestion = false;
break;
case "short":
inShortMessage = false;
inShortMessageForRuleGroup = false;
break;
case "url":
inUrl = false;
inUrlForRuleGroup = false;
break;
case MATCH:
if (inMessage) {
suggestionMatches.get(suggestionMatches.size() - 1).setLemmaString(match.toString());
} else if (inSuggestion) {
suggestionMatchesOutMsg.get(suggestionMatchesOutMsg.size() - 1).setLemmaString(match.toString());
} else if (inToken) {
tokenReference.setLemmaString(match.toString());
}
inMatch = false;
break;
case RULEGROUP:
urlForRuleGroup = new StringBuilder();
shortMessageForRuleGroup = new StringBuilder();
inRuleGroup = false;
ruleGroupIssueType = null;
rulegroupAntiPatterns.clear();
antiPatternCounter = 0;
ruleGroupDefaultOff = false;
defaultOff = false;
break;
case MARKER:
if (inCorrectExample) {
correctExample.append("</marker>");
} else if (inIncorrectExample) {
incorrectExample.append("</marker>");
} else if (inErrorTriggerExample) {
errorTriggerExample.append("</marker>");
} else if (inPattern || inAntiPattern) {
endPos = tokenCountForMarker;
inMarker = false;
}
break;
case "phrase":
if (inPhrases) {
finalizePhrase();
}
break;
case "includephrases":
patternTokens.clear();
break;
case PHRASES:
if (inPhrases) {
inPhrases = false;
}
break;
case UNIFICATION:
inUnificationDef = false;
break;
case FEATURE:
equivalenceFeatures.put(uFeature, uTypeList);
uTypeList = new ArrayList<>();
break;
case UNIFY:
inUnification = false;
//clear the features...
equivalenceFeatures = new HashMap<>();
//set negation on the last token only!
int lastElement = patternTokens.size() - 1;
patternTokens.get(lastElement).setLastInUnification();
if (uniNegation) {
patternTokens.get(lastElement).setUniNegation();
}
break;
case UNIFY_IGNORE:
inUnificationNeutral = false;
break;
}
}
use of org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule in project languagetool by languagetool-org.
the class TestFrenchDisambiguator method disambiguate.
@Override
public AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
AnalyzedSentence sentence = input;
String filePath = "/disambiguator.xml";
try (InputStream inputStream = getClass().getResourceAsStream(filePath)) {
final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
List<DisambiguationPatternRule> disambiguationRules = ruleLoader.getRules(inputStream);
for (final DisambiguationPatternRule patternRule : disambiguationRules) {
sentence = patternRule.replace(sentence);
}
} catch (Exception e) {
throw new RuntimeException("Problems with loading disambiguation file: " + filePath, e);
}
return sentence;
}
Aggregations