Search in sources :

Example 76 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class CaseRule method isProbablyCity.

private boolean isProbablyCity(int i, AnalyzedTokenReadings[] tokens) {
    String token = tokens[i].getToken();
    boolean hasCityPrefix = "Klein".equals(token) || "Groß".equals(token) || "Neu".equals(token);
    if (hasCityPrefix) {
        AnalyzedTokenReadings nextReadings = i < tokens.length - 1 ? tokens[i + 1] : null;
        return nextReadings != null && (!nextReadings.isTagged() || nextReadings.hasPartialPosTag("EIG"));
    }
    return false;
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 77 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class CaseRule method isPrevProbablyRelativePronoun.

// e.g. "Ein Kaninchen, das zaubern kann" - avoid false alarm here
//                          ^^^^^^^
private boolean isPrevProbablyRelativePronoun(AnalyzedTokenReadings[] tokens, int i) {
    if (i >= 3) {
        AnalyzedTokenReadings prev1 = tokens[i - 1];
        AnalyzedTokenReadings prev2 = tokens[i - 2];
        AnalyzedTokenReadings prev3 = tokens[i - 3];
        if (prev1.getToken().equals("das") && prev2.getToken().equals(",") && prev3.matchesPosTagRegex("SUB:...:SIN:NEU")) {
            return true;
        }
    }
    return false;
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 78 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class PatternRuleMatcher method createRuleMatch.

@Nullable
private RuleMatch createRuleMatch(List<Integer> tokenPositions, AnalyzedTokenReadings[] tokens, int firstMatchToken, int lastMatchToken, int firstMarkerMatchToken, int lastMarkerMatchToken) throws IOException {
    PatternRule rule = (PatternRule) this.rule;
    String errMessage = formatMatches(tokens, tokenPositions, firstMatchToken, rule.getMessage(), rule.getSuggestionMatches());
    String shortErrMessage = formatMatches(tokens, tokenPositions, firstMatchToken, rule.getShortMessage(), rule.getSuggestionMatches());
    String suggestionsOutMsg = formatMatches(tokens, tokenPositions, firstMatchToken, rule.getSuggestionsOutMsg(), rule.getSuggestionMatchesOutMsg());
    int correctedStPos = 0;
    if (rule.startPositionCorrection > 0) {
        for (int l = 0; l <= Math.min(rule.startPositionCorrection, tokenPositions.size() - 1); l++) {
            correctedStPos += tokenPositions.get(l);
        }
        correctedStPos--;
    }
    int idx = firstMatchToken + correctedStPos;
    if (idx >= tokens.length) {
        // TODO: hacky workaround, find a proper solution. See EnglishPatternRuleTest.testBug()
        // This is important when the reference points to a token with min="0", which has not been
        // matched... the subsequent match elements need to be renumbered, I guess, and that one
        // silently discarded
        idx = tokens.length - 1;
    }
    AnalyzedTokenReadings firstMatchTokenObj = tokens[idx];
    boolean startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj.getToken()) && matchPreservesCase(rule.getSuggestionMatches(), rule.getMessage()) && matchPreservesCase(rule.getSuggestionMatchesOutMsg(), rule.getSuggestionsOutMsg());
    if (firstMatchTokenObj.isSentenceStart() && tokens.length > firstMatchToken + correctedStPos + 1) {
        // make uppercasing work also at sentence start:
        firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
        startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj.getToken());
    }
    if (firstMarkerMatchToken == -1) {
        firstMarkerMatchToken = firstMatchToken;
    }
    int fromPos = tokens[firstMarkerMatchToken].getStartPos();
    // FIXME: this is fishy, assumes that comma should always come before whitespace:
    if (errMessage.contains(SUGGESTION_START_TAG + ",") && firstMarkerMatchToken >= 1) {
        fromPos = tokens[firstMarkerMatchToken - 1].getStartPos() + tokens[firstMarkerMatchToken - 1].getToken().length();
    }
    if (lastMarkerMatchToken == -1) {
        lastMarkerMatchToken = lastMatchToken;
    }
    AnalyzedTokenReadings token = tokens[Math.min(lastMarkerMatchToken, tokens.length - 1)];
    int toPos = token.getEndPos();
    if (fromPos < toPos) {
        //now do some spell-checking:
        if (!(errMessage.contains(PatternRuleHandler.PLEASE_SPELL_ME) && errMessage.contains(MISTAKE))) {
            String clearMsg = errMessage.replaceAll(PatternRuleHandler.PLEASE_SPELL_ME, "").replaceAll(MISTAKE, "");
            RuleMatch ruleMatch = new RuleMatch(rule, fromPos, toPos, clearMsg, shortErrMessage, startsWithUppercase, suggestionsOutMsg);
            if (rule.getFilter() != null) {
                RuleFilterEvaluator evaluator = new RuleFilterEvaluator(rule.getFilter());
                AnalyzedTokenReadings[] patternTokens = Arrays.copyOfRange(tokens, firstMatchToken, lastMatchToken + 1);
                return evaluator.runFilter(rule.getFilterArguments(), ruleMatch, patternTokens, tokenPositions);
            } else {
                return ruleMatch;
            }
        }
    }
    // failed to create any rule match...
    return null;
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Nullable(org.jetbrains.annotations.Nullable)

Example 79 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class Unifier method addTokenToSequence.

private void addTokenToSequence(List<AnalyzedTokenReadings> tokenSequence, AnalyzedToken token, int pos) {
    if (tokenSequence.size() <= pos || tokenSequence.isEmpty()) {
        AnalyzedTokenReadings tmpATR = new AnalyzedTokenReadings(token, 0);
        tokenSequence.add(tmpATR);
    } else {
        tokenSequence.get(pos).addReading(token);
    }
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 80 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class WordRepeatBeginningRule method match.

@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
    String lastToken = "";
    String beforeLastToken = "";
    List<RuleMatch> ruleMatches = new ArrayList<>();
    int pos = 0;
    for (AnalyzedSentence sentence : sentences) {
        AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
        if (tokens.length > 3) {
            AnalyzedTokenReadings analyzedToken = tokens[1];
            String token = analyzedToken.getToken();
            // avoid "..." etc. to be matched:
            boolean isWord = true;
            if (token.length() == 1) {
                char c = token.charAt(0);
                if (!Character.isLetter(c)) {
                    isWord = false;
                }
            }
            if (isWord && lastToken.equals(token) && !isException(token) && !isException(tokens[2].getToken()) && !isException(tokens[3].getToken())) {
                String shortMsg;
                if (isAdverb(analyzedToken)) {
                    shortMsg = messages.getString("desc_repetition_beginning_adv");
                } else if (beforeLastToken.equals(token)) {
                    shortMsg = messages.getString("desc_repetition_beginning_word");
                } else {
                    shortMsg = "";
                }
                if (!shortMsg.isEmpty()) {
                    String msg = shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
                    int startPos = analyzedToken.getStartPos();
                    int endPos = startPos + token.length();
                    RuleMatch ruleMatch = new RuleMatch(this, pos + startPos, pos + endPos, msg, shortMsg);
                    ruleMatches.add(ruleMatch);
                }
            }
            beforeLastToken = lastToken;
            lastToken = token;
        }
        pos += sentence.getText().length();
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Aggregations

AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)116 AnalyzedToken (org.languagetool.AnalyzedToken)48 ArrayList (java.util.ArrayList)47 AnalyzedSentence (org.languagetool.AnalyzedSentence)21 Test (org.junit.Test)16 RuleMatch (org.languagetool.rules.RuleMatch)14 Matcher (java.util.regex.Matcher)13 IOException (java.io.IOException)7 Nullable (org.jetbrains.annotations.Nullable)6 JLanguageTool (org.languagetool.JLanguageTool)6 Pattern (java.util.regex.Pattern)5 ChunkTag (org.languagetool.chunking.ChunkTag)5 English (org.languagetool.language.English)3 TaggedWord (org.languagetool.tagging.TaggedWord)3 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2