use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class WordRepeatBeginningRule method match.
@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
String lastToken = "";
String beforeLastToken = "";
List<RuleMatch> ruleMatches = new ArrayList<>();
int pos = 0;
for (AnalyzedSentence sentence : sentences) {
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
if (tokens.length > 3) {
AnalyzedTokenReadings analyzedToken = tokens[1];
String token = analyzedToken.getToken();
// avoid "..." etc. to be matched:
boolean isWord = true;
if (token.length() == 1) {
char c = token.charAt(0);
if (!Character.isLetter(c)) {
isWord = false;
}
}
if (isWord && lastToken.equals(token) && !isException(token) && !isException(tokens[2].getToken()) && !isException(tokens[3].getToken())) {
String shortMsg;
if (isAdverb(analyzedToken)) {
shortMsg = messages.getString("desc_repetition_beginning_adv");
} else if (beforeLastToken.equals(token)) {
shortMsg = messages.getString("desc_repetition_beginning_word");
} else {
shortMsg = "";
}
if (!shortMsg.isEmpty()) {
String msg = shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
int startPos = analyzedToken.getStartPos();
int endPos = startPos + token.length();
RuleMatch ruleMatch = new RuleMatch(this, pos + startPos, pos + endPos, msg, shortMsg);
ruleMatches.add(ruleMatch);
}
}
beforeLastToken = lastToken;
lastToken = token;
}
pos += sentence.getText().length();
}
return toRuleMatchArray(ruleMatches);
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class FalseFriendsAsBitextLoaderTest method match.
private RuleMatch[] match(BitextPatternRule rule, String src, String trg, JLanguageTool srcLanguageTool, JLanguageTool trgLanguageTool) throws IOException {
AnalyzedSentence srcText = srcLanguageTool.getAnalyzedSentence(src);
AnalyzedSentence trgText = trgLanguageTool.getAnalyzedSentence(trg);
return rule.match(srcText, trgText);
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class PatternRuleTest method testBadSentences.
private void testBadSentences(JLanguageTool languageTool, JLanguageTool allRulesLanguageTool, Language lang, Map<String, AbstractPatternRule> complexRules, AbstractPatternRule rule) throws IOException {
List<IncorrectExample> badSentences = rule.getIncorrectExamples();
if (badSentences.size() == 0) {
fail("No incorrect examples found for rule " + rule.getFullId());
}
// necessary for XML Pattern rules containing <or>
List<AbstractPatternRule> rules = allRulesLanguageTool.getPatternRulesByIdAndSubId(rule.getId(), rule.getSubId());
for (IncorrectExample origBadExample : badSentences) {
// enable indentation use
String origBadSentence = origBadExample.getExample().replaceAll("[\\n\\t]+", "");
List<String> expectedCorrections = origBadExample.getCorrections();
int expectedMatchStart = origBadSentence.indexOf("<marker>");
int expectedMatchEnd = origBadSentence.indexOf("</marker>") - "<marker>".length();
if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
fail(lang + ": No error position markup ('<marker>...</marker>') in bad example in rule " + rule.getFullId());
}
String badSentence = cleanXML(origBadSentence);
assertTrue(badSentence.trim().length() > 0);
// necessary for XML Pattern rules containing <or>
List<RuleMatch> matches = new ArrayList<>();
for (Rule auxRule : rules) {
matches.addAll(getMatches(auxRule, badSentence, languageTool));
}
if (rule instanceof RegexPatternRule || rule instanceof PatternRule && !((PatternRule) rule).isWithComplexPhrase()) {
if (matches.size() != 1) {
AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(badSentence);
StringBuilder sb = new StringBuilder("Analyzed token readings:");
for (AnalyzedTokenReadings atr : analyzedSentence.getTokens()) {
sb.append(" ").append(atr);
}
String info = "";
if (rule instanceof RegexPatternRule) {
info = "\nRegexp: " + ((RegexPatternRule) rule).getPattern().toString();
}
fail(lang + " rule " + rule.getFullId() + ":\n\"" + badSentence + "\"\n" + "Errors expected: 1\n" + "Errors found : " + matches.size() + "\n" + "Message: " + rule.getMessage() + "\n" + sb + "\nMatches: " + matches + info);
}
assertEquals(lang + ": Incorrect match position markup (start) for rule " + rule.getFullId() + ", sentence: " + badSentence, expectedMatchStart, matches.get(0).getFromPos());
assertEquals(lang + ": Incorrect match position markup (end) for rule " + rule.getFullId() + ", sentence: " + badSentence, expectedMatchEnd, matches.get(0).getToPos());
// make sure suggestion is what we expect it to be
assertSuggestions(badSentence, lang, expectedCorrections, rule, matches);
// make sure the suggested correction doesn't produce an error:
if (matches.get(0).getSuggestedReplacements().size() > 0) {
int fromPos = matches.get(0).getFromPos();
int toPos = matches.get(0).getToPos();
for (String replacement : matches.get(0).getSuggestedReplacements()) {
String fixedSentence = badSentence.substring(0, fromPos) + replacement + badSentence.substring(toPos);
matches = getMatches(rule, fixedSentence, languageTool);
if (matches.size() > 0) {
fail("Incorrect input:\n" + " " + badSentence + "\nCorrected sentence:\n" + " " + fixedSentence + "\nBy Rule:\n" + " " + rule.getFullId() + "\nThe correction triggered an error itself:\n" + " " + matches.get(0) + "\n");
}
}
}
} else {
// for multiple rules created with complex phrases
matches = getMatches(rule, badSentence, languageTool);
if (matches.size() == 0 && !complexRules.containsKey(rule.getId() + badSentence)) {
complexRules.put(rule.getId() + badSentence, rule);
}
if (matches.size() != 0) {
complexRules.put(rule.getId() + badSentence, null);
assertTrue(lang + ": Did expect one error in: \"" + badSentence + "\" (Rule: " + rule.getFullId() + "), got " + matches.size(), matches.size() == 1);
assertEquals(lang + ": Incorrect match position markup (start) for rule " + rule.getFullId(), expectedMatchStart, matches.get(0).getFromPos());
assertEquals(lang + ": Incorrect match position markup (end) for rule " + rule.getFullId(), expectedMatchEnd, matches.get(0).getToPos());
assertSuggestions(badSentence, lang, expectedCorrections, rule, matches);
assertSuggestionsDoNotCreateErrors(badSentence, languageTool, rule, matches);
}
}
// check for overlapping rules
/*matches = getMatches(rule, badSentence, languageTool);
List<RuleMatch> matchesAllRules = allRulesLanguageTool.check(badSentence);
for (RuleMatch match : matchesAllRules) {
if (!match.getRule().getId().equals(rule.getId()) && !matches.isEmpty()
&& rangeIsOverlapping(matches.get(0).getFromPos(), matches.get(0).getToPos(), match.getFromPos(), match.getToPos()))
System.err.println("WARN: " + lang.getShortCode() + ": '" + badSentence + "' in "
+ rule.getId() + " also matched " + match.getRule().getId());
}*/
}
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class PatternRuleTest method match.
private boolean match(Rule rule, String sentence, JLanguageTool languageTool) throws IOException {
AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
RuleMatch[] matches = rule.match(analyzedSentence);
return matches.length > 0;
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class TestFrenchDisambiguator method disambiguate.
@Override
public AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
AnalyzedSentence sentence = input;
String filePath = "/disambiguator.xml";
try (InputStream inputStream = getClass().getResourceAsStream(filePath)) {
final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
List<DisambiguationPatternRule> disambiguationRules = ruleLoader.getRules(inputStream);
for (final DisambiguationPatternRule patternRule : disambiguationRules) {
sentence = patternRule.replace(sentence);
}
} catch (Exception e) {
throw new RuntimeException("Problems with loading disambiguation file: " + filePath, e);
}
return sentence;
}
Aggregations