Search in sources :

Example 6 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class UselessExampleFinder method run.

private void run(Language lang) throws IOException {
    File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
    if (!basePath.exists()) {
        throw new RuntimeException("basePath does not exist: " + basePath);
    }
    String langCode = lang.getShortCode();
    File xml = new File(basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml");
    List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
    JLanguageTool tool = new JLanguageTool(lang);
    for (Rule rule : tool.getAllActiveRules()) {
        if (!(rule instanceof PatternRule)) {
            continue;
        }
        List<CorrectExample> correctExamples = rule.getCorrectExamples();
        List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
        for (IncorrectExample incorrectExample : incorrectExamples) {
            checkCorrections(rule, correctExamples, incorrectExample, xmlLines);
        }
    }
    System.err.println("Useless examples: " + uselessExampleCount);
    System.err.println("Removed lines: " + removedLinesCount);
    for (String xmlLine : xmlLines) {
        System.out.println(xmlLine);
    }
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) CorrectExample(org.languagetool.rules.CorrectExample) JLanguageTool(org.languagetool.JLanguageTool) FileReader(java.io.FileReader) Rule(org.languagetool.rules.Rule) PatternRule(org.languagetool.rules.patterns.PatternRule) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) IncorrectExample(org.languagetool.rules.IncorrectExample) File(java.io.File)

Example 7 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class FalseFriendRuleHandler method endElement.

@Override
public void endElement(String namespaceURI, String sName, String qName) throws SAXException {
    switch(qName) {
        case RULE:
            if (language.equalsConsiderVariantsIfSpecified(textLanguage) && translationLanguage != null && translationLanguage.equalsConsiderVariantsIfSpecified(motherTongue) && language != motherTongue && !translations.isEmpty()) {
                formatter.applyPattern(messages.getString("false_friend_hint"));
                String tokensAsString = StringUtils.join(patternTokens, " ").replace('|', '/');
                Object[] messageArguments = { tokensAsString, messages.getString(textLanguage.getShortCode()), formatTranslations(translations), messages.getString(motherTongue.getShortCode()) };
                String description = formatter.format(messageArguments);
                PatternRule rule = new FalseFriendPatternRule(id, language, patternTokens, messages.getString("false_friend_desc") + " " + tokensAsString, description, messages.getString("false_friend"));
                rule.setCorrectExamples(correctExamples);
                rule.setIncorrectExamples(incorrectExamples);
                rule.setCategory(Categories.FALSE_FRIENDS.getCategory(messages));
                if (defaultOff) {
                    rule.setDefaultOff();
                }
                rules.add(rule);
            }
            if (patternTokens != null) {
                patternTokens.clear();
            }
            break;
        case TOKEN:
            finalizeTokens();
            break;
        case PATTERN:
            inPattern = false;
            break;
        case TRANSLATION:
            if (currentTranslationLanguage != null && currentTranslationLanguage.equalsConsiderVariantsIfSpecified(motherTongue)) {
                // currentTranslationLanguage can be null if the language is not supported
                translations.add(translation);
            }
            if (currentTranslationLanguage != null && currentTranslationLanguage.equalsConsiderVariantsIfSpecified(textLanguage) && language.equalsConsiderVariantsIfSpecified(motherTongue)) {
                suggestions.add(translation.toString());
            }
            translation = new StringBuilder();
            inTranslation = false;
            currentTranslationLanguage = null;
            break;
        case EXAMPLE:
            if (inCorrectExample) {
                correctExamples.add(new CorrectExample(correctExample.toString()));
            } else if (inIncorrectExample) {
                incorrectExamples.add(new IncorrectExample(incorrectExample.toString()));
            }
            inCorrectExample = false;
            inIncorrectExample = false;
            correctExample = new StringBuilder();
            incorrectExample = new StringBuilder();
            break;
        case MESSAGE:
            inMessage = false;
            break;
        case RULEGROUP:
            if (!suggestions.isEmpty()) {
                List<String> l = new ArrayList<>(suggestions);
                suggestionMap.put(id, l);
                suggestions.clear();
            }
            inRuleGroup = false;
            break;
    }
}
Also used : CorrectExample(org.languagetool.rules.CorrectExample) IncorrectExample(org.languagetool.rules.IncorrectExample)

Example 8 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class BitextPatternRuleHandler method setExample.

private IncorrectExample setExample() {
    IncorrectExample example = null;
    if (inCorrectExample) {
        example = new IncorrectExample(correctExample.toString());
    } else if (inIncorrectExample) {
        String[] corrections = exampleCorrection.toString().split("\\|");
        if (corrections.length > 0 && corrections[0].length() > 0) {
            example = new IncorrectExample(incorrectExample.toString(), Arrays.asList(corrections));
        } else {
            example = new IncorrectExample(incorrectExample.toString());
        }
    } else if (inErrorTriggerExample) {
        throw new RuntimeException("'triggers_error' is not supported for bitext XML");
    }
    correctExample = new StringBuilder();
    incorrectExample = new StringBuilder();
    exampleCorrection = new StringBuilder();
    return example;
}
Also used : IncorrectExample(org.languagetool.rules.IncorrectExample)

Example 9 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class ExampleSentenceProvider method initExampleSentences.

private void initExampleSentences(Language language) throws IOException {
    JLanguageTool lt = new JLanguageTool(language);
    List<Rule> rules = lt.getAllActiveRules();
    List<ExampleSentence> sentences = new ArrayList<>();
    for (Rule rule : rules) {
        if (rule instanceof AbstractPatternRule && !rule.isDefaultOff()) {
            List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
            for (IncorrectExample incorrectExample : incorrectExamples) {
                ExampleSentence sentence = new ExampleSentence(incorrectExample.getExample(), rule.getId());
                sentences.add(sentence);
            }
        }
    }
    languageToExamples.put(language, sentences);
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule) Rule(org.languagetool.rules.Rule) IncorrectExample(org.languagetool.rules.IncorrectExample) AbstractPatternRule(org.languagetool.rules.patterns.AbstractPatternRule)

Example 10 with IncorrectExample

use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.

the class IndexerSearcherTest method createIndex.

private int createIndex(JLanguageTool lt) throws IOException {
    int ruleCount = 0;
    try (Indexer indexer = new Indexer(directory, lt.getLanguage())) {
        List<Rule> rules = lt.getAllActiveRules();
        for (Rule rule : rules) {
            if (rule instanceof PatternRule && !rule.isDefaultOff()) {
                PatternRule patternRule = (PatternRule) rule;
                List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
                Document doc = new Document();
                FieldType idType = new FieldType();
                idType.setStored(true);
                idType.setTokenized(false);
                doc.add(new Field("ruleId", patternRule.getFullId(), idType));
                for (IncorrectExample incorrectExample : incorrectExamples) {
                    String example = incorrectExample.getExample().replaceAll("</?marker>", "");
                    FieldType fieldType = new FieldType();
                    fieldType.setStored(true);
                    fieldType.setTokenized(true);
                    fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                    doc.add(new Field(FIELD_NAME, example, fieldType));
                    // no lowercase here, it would lowercase the input to the LT analysis, leading to wrong POS tags:
                    doc.add(new Field(FIELD_NAME_LOWERCASE, example, fieldType));
                }
                indexer.add(doc);
                ruleCount++;
            }
        }
    }
    errorSearcher = new Searcher(directory);
    return ruleCount;
}
Also used : Field(org.apache.lucene.document.Field) PatternRule(org.languagetool.rules.patterns.PatternRule) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule) IncorrectExample(org.languagetool.rules.IncorrectExample) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType)

Aggregations

IncorrectExample (org.languagetool.rules.IncorrectExample)12 Rule (org.languagetool.rules.Rule)8 JLanguageTool (org.languagetool.JLanguageTool)4 File (java.io.File)3 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)3 PatternRule (org.languagetool.rules.patterns.PatternRule)3 FileReader (java.io.FileReader)2 Test (org.junit.Test)2 CorrectExample (org.languagetool.rules.CorrectExample)2 DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)2 ArrayList (java.util.ArrayList)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 AnalyzedSentence (org.languagetool.AnalyzedSentence)1 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)1 MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)1 ChunkTag (org.languagetool.chunking.ChunkTag)1 RuleMatch (org.languagetool.rules.RuleMatch)1 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)1