use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.
the class UselessExampleFinder method run.
private void run(Language lang) throws IOException {
File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
if (!basePath.exists()) {
throw new RuntimeException("basePath does not exist: " + basePath);
}
String langCode = lang.getShortCode();
File xml = new File(basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml");
List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
JLanguageTool tool = new JLanguageTool(lang);
for (Rule rule : tool.getAllActiveRules()) {
if (!(rule instanceof PatternRule)) {
continue;
}
List<CorrectExample> correctExamples = rule.getCorrectExamples();
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
for (IncorrectExample incorrectExample : incorrectExamples) {
checkCorrections(rule, correctExamples, incorrectExample, xmlLines);
}
}
System.err.println("Useless examples: " + uselessExampleCount);
System.err.println("Removed lines: " + removedLinesCount);
for (String xmlLine : xmlLines) {
System.out.println(xmlLine);
}
}
use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.
the class FalseFriendRuleHandler method endElement.
@Override
public void endElement(String namespaceURI, String sName, String qName) throws SAXException {
switch(qName) {
case RULE:
if (language.equalsConsiderVariantsIfSpecified(textLanguage) && translationLanguage != null && translationLanguage.equalsConsiderVariantsIfSpecified(motherTongue) && language != motherTongue && !translations.isEmpty()) {
formatter.applyPattern(messages.getString("false_friend_hint"));
String tokensAsString = StringUtils.join(patternTokens, " ").replace('|', '/');
Object[] messageArguments = { tokensAsString, messages.getString(textLanguage.getShortCode()), formatTranslations(translations), messages.getString(motherTongue.getShortCode()) };
String description = formatter.format(messageArguments);
PatternRule rule = new FalseFriendPatternRule(id, language, patternTokens, messages.getString("false_friend_desc") + " " + tokensAsString, description, messages.getString("false_friend"));
rule.setCorrectExamples(correctExamples);
rule.setIncorrectExamples(incorrectExamples);
rule.setCategory(Categories.FALSE_FRIENDS.getCategory(messages));
if (defaultOff) {
rule.setDefaultOff();
}
rules.add(rule);
}
if (patternTokens != null) {
patternTokens.clear();
}
break;
case TOKEN:
finalizeTokens();
break;
case PATTERN:
inPattern = false;
break;
case TRANSLATION:
if (currentTranslationLanguage != null && currentTranslationLanguage.equalsConsiderVariantsIfSpecified(motherTongue)) {
// currentTranslationLanguage can be null if the language is not supported
translations.add(translation);
}
if (currentTranslationLanguage != null && currentTranslationLanguage.equalsConsiderVariantsIfSpecified(textLanguage) && language.equalsConsiderVariantsIfSpecified(motherTongue)) {
suggestions.add(translation.toString());
}
translation = new StringBuilder();
inTranslation = false;
currentTranslationLanguage = null;
break;
case EXAMPLE:
if (inCorrectExample) {
correctExamples.add(new CorrectExample(correctExample.toString()));
} else if (inIncorrectExample) {
incorrectExamples.add(new IncorrectExample(incorrectExample.toString()));
}
inCorrectExample = false;
inIncorrectExample = false;
correctExample = new StringBuilder();
incorrectExample = new StringBuilder();
break;
case MESSAGE:
inMessage = false;
break;
case RULEGROUP:
if (!suggestions.isEmpty()) {
List<String> l = new ArrayList<>(suggestions);
suggestionMap.put(id, l);
suggestions.clear();
}
inRuleGroup = false;
break;
}
}
use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.
the class BitextPatternRuleHandler method setExample.
private IncorrectExample setExample() {
IncorrectExample example = null;
if (inCorrectExample) {
example = new IncorrectExample(correctExample.toString());
} else if (inIncorrectExample) {
String[] corrections = exampleCorrection.toString().split("\\|");
if (corrections.length > 0 && corrections[0].length() > 0) {
example = new IncorrectExample(incorrectExample.toString(), Arrays.asList(corrections));
} else {
example = new IncorrectExample(incorrectExample.toString());
}
} else if (inErrorTriggerExample) {
throw new RuntimeException("'triggers_error' is not supported for bitext XML");
}
correctExample = new StringBuilder();
incorrectExample = new StringBuilder();
exampleCorrection = new StringBuilder();
return example;
}
use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.
the class ExampleSentenceProvider method initExampleSentences.
private void initExampleSentences(Language language) throws IOException {
JLanguageTool lt = new JLanguageTool(language);
List<Rule> rules = lt.getAllActiveRules();
List<ExampleSentence> sentences = new ArrayList<>();
for (Rule rule : rules) {
if (rule instanceof AbstractPatternRule && !rule.isDefaultOff()) {
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
for (IncorrectExample incorrectExample : incorrectExamples) {
ExampleSentence sentence = new ExampleSentence(incorrectExample.getExample(), rule.getId());
sentences.add(sentence);
}
}
}
languageToExamples.put(language, sentences);
}
use of org.languagetool.rules.IncorrectExample in project languagetool by languagetool-org.
the class IndexerSearcherTest method createIndex.
private int createIndex(JLanguageTool lt) throws IOException {
int ruleCount = 0;
try (Indexer indexer = new Indexer(directory, lt.getLanguage())) {
List<Rule> rules = lt.getAllActiveRules();
for (Rule rule : rules) {
if (rule instanceof PatternRule && !rule.isDefaultOff()) {
PatternRule patternRule = (PatternRule) rule;
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
Document doc = new Document();
FieldType idType = new FieldType();
idType.setStored(true);
idType.setTokenized(false);
doc.add(new Field("ruleId", patternRule.getFullId(), idType));
for (IncorrectExample incorrectExample : incorrectExamples) {
String example = incorrectExample.getExample().replaceAll("</?marker>", "");
FieldType fieldType = new FieldType();
fieldType.setStored(true);
fieldType.setTokenized(true);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field(FIELD_NAME, example, fieldType));
// no lowercase here, it would lowercase the input to the LT analysis, leading to wrong POS tags:
doc.add(new Field(FIELD_NAME_LOWERCASE, example, fieldType));
}
indexer.add(doc);
ruleCount++;
}
}
}
errorSearcher = new Searcher(directory);
return ruleCount;
}
Aggregations