Search in sources :

Example 1 with DisambiguationPatternRule

use of org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule in project languagetool by languagetool-org.

the class PatternRuleTest method runTestForLanguage.

public void runTestForLanguage(Language lang) throws IOException {
    validatePatternFile(lang);
    System.out.print("Running pattern rule tests for " + lang.getName() + "... ");
    MultiThreadedJLanguageTool languageTool = new MultiThreadedJLanguageTool(lang);
    if (CHECK_WITH_SENTENCE_SPLITTING) {
        disableSpellingRules(languageTool);
    }
    MultiThreadedJLanguageTool allRulesLanguageTool = new MultiThreadedJLanguageTool(lang);
    validateRuleIds(lang, allRulesLanguageTool);
    List<AbstractPatternRule> rules = getAllPatternRules(lang, languageTool);
    for (AbstractPatternRule rule : rules) {
        // Test the rule pattern.
        /* check for useless 'marker' elements commented out - too slow to always run:
      PatternRuleXmlCreator creator = new PatternRuleXmlCreator();
      String xml = creator.toXML(rule.getPatternRuleId(), lang);
      if (PATTERN_MARKER_START.matcher(xml).matches() && PATTERN_MARKER_END.matcher(xml).matches()) {
        System.err.println("WARNING " + lang + ": useless <marker>: " + rule.getFullId());
      }*/
        // too aggressive for now:
        //PatternTestTools.failIfWhitespaceInToken(rule.getPatternTokens(), rule, lang);
        PatternTestTools.warnIfRegexpSyntaxNotKosher(rule.getPatternTokens(), rule.getId(), rule.getSubId(), lang);
        // Test the rule antipatterns.
        List<DisambiguationPatternRule> antiPatterns = rule.getAntiPatterns();
        for (DisambiguationPatternRule antiPattern : antiPatterns) {
            PatternTestTools.warnIfRegexpSyntaxNotKosher(antiPattern.getPatternTokens(), antiPattern.getId(), antiPattern.getSubId(), lang);
        }
        if (rule.getCorrectExamples().size() == 0) {
            boolean correctionExists = false;
            for (IncorrectExample incorrectExample : rule.getIncorrectExamples()) {
                if (incorrectExample.getCorrections().size() > 0) {
                    correctionExists = true;
                    break;
                }
            }
            if (!correctionExists) {
                fail("Rule " + rule.getFullId() + " in language " + lang + " needs at least one <example> with a 'correction' attribute" + " or one <example> of type='correct'.");
            }
        }
    }
    testGrammarRulesFromXML(rules, languageTool, allRulesLanguageTool, lang);
    System.out.println(rules.size() + " rules tested.");
    allRulesLanguageTool.shutdown();
    languageTool.shutdown();
}
Also used : MultiThreadedJLanguageTool(org.languagetool.MultiThreadedJLanguageTool) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) IncorrectExample(org.languagetool.rules.IncorrectExample)

Example 2 with DisambiguationPatternRule

use of org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule in project languagetool by languagetool-org.

the class PatternRuleHandler method endElement.

@Override
public void endElement(String namespaceURI, String sName, String qName) throws SAXException {
    switch(qName) {
        case "category":
            categoryIssueType = null;
            break;
        case "regexp":
            inRegex = false;
            break;
        case RULE:
            suggestionMatchesOutMsg = addLegacyMatches(suggestionMatchesOutMsg, suggestionsOutMsg.toString(), false);
            if (relaxedMode && id == null) {
                id = "";
            }
            if (relaxedMode && name == null) {
                name = "";
            }
            if (phrasePatternTokens.isEmpty()) {
                // Elements contain information whether they are inside a <marker>...</marker>,
                // but for phraserefs this depends on the position where the phraseref is used
                // not where it's defined. Thus we have to copy the elements so each use of
                // the phraseref can carry their own information:
                List<PatternToken> tmpPatternTokens = new ArrayList<>();
                createRules(new ArrayList<>(patternTokens), tmpPatternTokens, 0);
            } else {
                if (!patternTokens.isEmpty()) {
                    for (List<PatternToken> ph : phrasePatternTokens) {
                        ph.addAll(new ArrayList<>(patternTokens));
                    }
                }
                for (List<PatternToken> phrasePatternToken : phrasePatternTokens) {
                    processElement(phrasePatternToken);
                    List<PatternToken> tmpPatternTokens = new ArrayList<>();
                    createRules(phrasePatternToken, tmpPatternTokens, 0);
                }
            }
            patternTokens.clear();
            if (phrasePatternTokens != null) {
                phrasePatternTokens.clear();
            }
            ruleIssueType = null;
            inRule = false;
            filterClassName = null;
            filterArgs = null;
            break;
        case EXCEPTION:
            finalizeExceptions();
            break;
        case AND:
            inAndGroup = false;
            andGroupCounter = 0;
            tokenCounter++;
            break;
        case OR:
            inOrGroup = false;
            orGroupCounter = 0;
            tokenCounter++;
            break;
        case TOKEN:
            finalizeTokens();
            break;
        case PATTERN:
            inPattern = false;
            if (lastPhrase) {
                patternTokens.clear();
            }
            tokenCounter = 0;
            break;
        case ANTIPATTERN:
            String antiId = id;
            if (inRuleGroup) {
                if (subId > 0) {
                    antiId = ruleGroupId + "[" + subId + "]";
                } else {
                    antiId = ruleGroupId;
                }
            }
            DisambiguationPatternRule rule = new DisambiguationPatternRule(antiId + "_antipattern:" + antiPatternCounter, "antipattern", language, patternTokens, null, null, DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE);
            if (startPos != -1 && endPos != -1) {
                rule.setStartPositionCorrection(startPos);
                rule.setEndPositionCorrection(endPos - tokenCountForMarker);
            } else {
                // around issue https://github.com/languagetool-org/languagetool/issues/189:
                for (PatternToken patternToken : patternTokens) {
                    patternToken.setInsideMarker(true);
                }
            }
            patternTokens.clear();
            if (inRule) {
                ruleAntiPatterns.add(rule);
            } else {
                // a rulegroup shares all antipatterns not included in a single rule
                rulegroupAntiPatterns.add(rule);
            }
            tokenCounter = 0;
            inAntiPattern = false;
            break;
        case EXAMPLE:
            if (inCorrectExample) {
                correctExamples.add(new CorrectExample(correctExample.toString()));
            } else if (inIncorrectExample) {
                IncorrectExample example;
                List<String> corrections = new ArrayList<>();
                corrections.addAll(Arrays.asList(exampleCorrection.toString().split("\\|")));
                if (corrections.size() > 0) {
                    if (exampleCorrection.toString().endsWith("|")) {
                        // split() will ignore trailing empty items
                        corrections.add("");
                    }
                    example = new IncorrectExample(incorrectExample.toString(), corrections);
                } else {
                    example = new IncorrectExample(incorrectExample.toString());
                }
                incorrectExamples.add(example);
            } else if (inErrorTriggerExample) {
                errorTriggeringExamples.add(new ErrorTriggeringExample(errorTriggerExample.toString()));
            }
            inCorrectExample = false;
            inIncorrectExample = false;
            inErrorTriggerExample = false;
            correctExample = new StringBuilder();
            incorrectExample = new StringBuilder();
            errorTriggerExample = new StringBuilder();
            exampleCorrection = new StringBuilder();
            break;
        case MESSAGE:
            suggestionMatches = addLegacyMatches(suggestionMatches, message.toString(), true);
            inMessage = false;
            break;
        case SUGGESTION:
            if (inMessage) {
                message.append("</suggestion>");
            } else {
                //suggestion outside message
                suggestionsOutMsg.append("</suggestion>");
            }
            inSuggestion = false;
            break;
        case "short":
            inShortMessage = false;
            inShortMessageForRuleGroup = false;
            break;
        case "url":
            inUrl = false;
            inUrlForRuleGroup = false;
            break;
        case MATCH:
            if (inMessage) {
                suggestionMatches.get(suggestionMatches.size() - 1).setLemmaString(match.toString());
            } else if (inSuggestion) {
                suggestionMatchesOutMsg.get(suggestionMatchesOutMsg.size() - 1).setLemmaString(match.toString());
            } else if (inToken) {
                tokenReference.setLemmaString(match.toString());
            }
            inMatch = false;
            break;
        case RULEGROUP:
            urlForRuleGroup = new StringBuilder();
            shortMessageForRuleGroup = new StringBuilder();
            inRuleGroup = false;
            ruleGroupIssueType = null;
            rulegroupAntiPatterns.clear();
            antiPatternCounter = 0;
            ruleGroupDefaultOff = false;
            defaultOff = false;
            break;
        case MARKER:
            if (inCorrectExample) {
                correctExample.append("</marker>");
            } else if (inIncorrectExample) {
                incorrectExample.append("</marker>");
            } else if (inErrorTriggerExample) {
                errorTriggerExample.append("</marker>");
            } else if (inPattern || inAntiPattern) {
                endPos = tokenCountForMarker;
                inMarker = false;
            }
            break;
        case "phrase":
            if (inPhrases) {
                finalizePhrase();
            }
            break;
        case "includephrases":
            patternTokens.clear();
            break;
        case PHRASES:
            if (inPhrases) {
                inPhrases = false;
            }
            break;
        case UNIFICATION:
            inUnificationDef = false;
            break;
        case FEATURE:
            equivalenceFeatures.put(uFeature, uTypeList);
            uTypeList = new ArrayList<>();
            break;
        case UNIFY:
            inUnification = false;
            //clear the features...
            equivalenceFeatures = new HashMap<>();
            //set negation on the last token only!
            int lastElement = patternTokens.size() - 1;
            patternTokens.get(lastElement).setLastInUnification();
            if (uniNegation) {
                patternTokens.get(lastElement).setUniNegation();
            }
            break;
        case UNIFY_IGNORE:
            inUnificationNeutral = false;
            break;
    }
}
Also used : ArrayList(java.util.ArrayList) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) ArrayList(java.util.ArrayList) List(java.util.List)

Example 3 with DisambiguationPatternRule

use of org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule in project languagetool by languagetool-org.

the class TestFrenchDisambiguator method disambiguate.

@Override
public AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
    AnalyzedSentence sentence = input;
    String filePath = "/disambiguator.xml";
    try (InputStream inputStream = getClass().getResourceAsStream(filePath)) {
        final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
        List<DisambiguationPatternRule> disambiguationRules = ruleLoader.getRules(inputStream);
        for (final DisambiguationPatternRule patternRule : disambiguationRules) {
            sentence = patternRule.replace(sentence);
        }
    } catch (Exception e) {
        throw new RuntimeException("Problems with loading disambiguation file: " + filePath, e);
    }
    return sentence;
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) InputStream(java.io.InputStream) DisambiguationPatternRule(org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule) DisambiguationRuleLoader(org.languagetool.tagging.disambiguation.rules.DisambiguationRuleLoader) IOException(java.io.IOException)

Aggregations

DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)3 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 AnalyzedSentence (org.languagetool.AnalyzedSentence)1 MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)1 IncorrectExample (org.languagetool.rules.IncorrectExample)1 DisambiguationRuleLoader (org.languagetool.tagging.disambiguation.rules.DisambiguationRuleLoader)1