use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class CaseRule method isProbablyCity.
private boolean isProbablyCity(int i, AnalyzedTokenReadings[] tokens) {
String token = tokens[i].getToken();
boolean hasCityPrefix = "Klein".equals(token) || "Groß".equals(token) || "Neu".equals(token);
if (hasCityPrefix) {
AnalyzedTokenReadings nextReadings = i < tokens.length - 1 ? tokens[i + 1] : null;
return nextReadings != null && (!nextReadings.isTagged() || nextReadings.hasPartialPosTag("EIG"));
}
return false;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class CaseRule method isPrevProbablyRelativePronoun.
// e.g. "Ein Kaninchen, das zaubern kann" - avoid false alarm here
// ^^^^^^^
private boolean isPrevProbablyRelativePronoun(AnalyzedTokenReadings[] tokens, int i) {
if (i >= 3) {
AnalyzedTokenReadings prev1 = tokens[i - 1];
AnalyzedTokenReadings prev2 = tokens[i - 2];
AnalyzedTokenReadings prev3 = tokens[i - 3];
if (prev1.getToken().equals("das") && prev2.getToken().equals(",") && prev3.matchesPosTagRegex("SUB:...:SIN:NEU")) {
return true;
}
}
return false;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class PatternRuleMatcher method createRuleMatch.
@Nullable
private RuleMatch createRuleMatch(List<Integer> tokenPositions, AnalyzedTokenReadings[] tokens, int firstMatchToken, int lastMatchToken, int firstMarkerMatchToken, int lastMarkerMatchToken) throws IOException {
PatternRule rule = (PatternRule) this.rule;
String errMessage = formatMatches(tokens, tokenPositions, firstMatchToken, rule.getMessage(), rule.getSuggestionMatches());
String shortErrMessage = formatMatches(tokens, tokenPositions, firstMatchToken, rule.getShortMessage(), rule.getSuggestionMatches());
String suggestionsOutMsg = formatMatches(tokens, tokenPositions, firstMatchToken, rule.getSuggestionsOutMsg(), rule.getSuggestionMatchesOutMsg());
int correctedStPos = 0;
if (rule.startPositionCorrection > 0) {
for (int l = 0; l <= Math.min(rule.startPositionCorrection, tokenPositions.size() - 1); l++) {
correctedStPos += tokenPositions.get(l);
}
correctedStPos--;
}
int idx = firstMatchToken + correctedStPos;
if (idx >= tokens.length) {
// TODO: hacky workaround, find a proper solution. See EnglishPatternRuleTest.testBug()
// This is important when the reference points to a token with min="0", which has not been
// matched... the subsequent match elements need to be renumbered, I guess, and that one
// silently discarded
idx = tokens.length - 1;
}
AnalyzedTokenReadings firstMatchTokenObj = tokens[idx];
boolean startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj.getToken()) && matchPreservesCase(rule.getSuggestionMatches(), rule.getMessage()) && matchPreservesCase(rule.getSuggestionMatchesOutMsg(), rule.getSuggestionsOutMsg());
if (firstMatchTokenObj.isSentenceStart() && tokens.length > firstMatchToken + correctedStPos + 1) {
// make uppercasing work also at sentence start:
firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj.getToken());
}
if (firstMarkerMatchToken == -1) {
firstMarkerMatchToken = firstMatchToken;
}
int fromPos = tokens[firstMarkerMatchToken].getStartPos();
// FIXME: this is fishy, assumes that comma should always come before whitespace:
if (errMessage.contains(SUGGESTION_START_TAG + ",") && firstMarkerMatchToken >= 1) {
fromPos = tokens[firstMarkerMatchToken - 1].getStartPos() + tokens[firstMarkerMatchToken - 1].getToken().length();
}
if (lastMarkerMatchToken == -1) {
lastMarkerMatchToken = lastMatchToken;
}
AnalyzedTokenReadings token = tokens[Math.min(lastMarkerMatchToken, tokens.length - 1)];
int toPos = token.getEndPos();
if (fromPos < toPos) {
//now do some spell-checking:
if (!(errMessage.contains(PatternRuleHandler.PLEASE_SPELL_ME) && errMessage.contains(MISTAKE))) {
String clearMsg = errMessage.replaceAll(PatternRuleHandler.PLEASE_SPELL_ME, "").replaceAll(MISTAKE, "");
RuleMatch ruleMatch = new RuleMatch(rule, fromPos, toPos, clearMsg, shortErrMessage, startsWithUppercase, suggestionsOutMsg);
if (rule.getFilter() != null) {
RuleFilterEvaluator evaluator = new RuleFilterEvaluator(rule.getFilter());
AnalyzedTokenReadings[] patternTokens = Arrays.copyOfRange(tokens, firstMatchToken, lastMatchToken + 1);
return evaluator.runFilter(rule.getFilterArguments(), ruleMatch, patternTokens, tokenPositions);
} else {
return ruleMatch;
}
}
}
// failed to create any rule match...
return null;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class Unifier method addTokenToSequence.
private void addTokenToSequence(List<AnalyzedTokenReadings> tokenSequence, AnalyzedToken token, int pos) {
if (tokenSequence.size() <= pos || tokenSequence.isEmpty()) {
AnalyzedTokenReadings tmpATR = new AnalyzedTokenReadings(token, 0);
tokenSequence.add(tmpATR);
} else {
tokenSequence.get(pos).addReading(token);
}
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class WordRepeatBeginningRule method match.
@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
String lastToken = "";
String beforeLastToken = "";
List<RuleMatch> ruleMatches = new ArrayList<>();
int pos = 0;
for (AnalyzedSentence sentence : sentences) {
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
if (tokens.length > 3) {
AnalyzedTokenReadings analyzedToken = tokens[1];
String token = analyzedToken.getToken();
// avoid "..." etc. to be matched:
boolean isWord = true;
if (token.length() == 1) {
char c = token.charAt(0);
if (!Character.isLetter(c)) {
isWord = false;
}
}
if (isWord && lastToken.equals(token) && !isException(token) && !isException(tokens[2].getToken()) && !isException(tokens[3].getToken())) {
String shortMsg;
if (isAdverb(analyzedToken)) {
shortMsg = messages.getString("desc_repetition_beginning_adv");
} else if (beforeLastToken.equals(token)) {
shortMsg = messages.getString("desc_repetition_beginning_word");
} else {
shortMsg = "";
}
if (!shortMsg.isEmpty()) {
String msg = shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
int startPos = analyzedToken.getStartPos();
int endPos = startPos + token.length();
RuleMatch ruleMatch = new RuleMatch(this, pos + startPos, pos + endPos, msg, shortMsg);
ruleMatches.add(ruleMatch);
}
}
beforeLastToken = lastToken;
lastToken = token;
}
pos += sentence.getText().length();
}
return toRuleMatchArray(ruleMatches);
}
Aggregations