Search in sources :

Example 46 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class AbstractSimpleReplaceRule method match.

@Override
public RuleMatch[] match(AnalyzedSentence sentence) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
    for (AnalyzedTokenReadings tokenReadings : tokens) {
        // short for SENT_START
        if (JLanguageTool.SENTENCE_START_TAGNAME.equals(tokenReadings.getAnalyzedToken(0).getPOSTag()))
            continue;
        // and speller-ignorable rules
        if (tokenReadings.isImmunized() || tokenReadings.isIgnoredBySpeller()) {
            continue;
        }
        String originalTokenStr = tokenReadings.getToken();
        if (ignoreTaggedWords && isTagged(tokenReadings)) {
            continue;
        }
        String tokenString = cleanup(originalTokenStr);
        // try first with the original word, then with the all lower-case version
        List<String> possibleReplacements = getWrongWords().get(originalTokenStr);
        if (possibleReplacements == null) {
            possibleReplacements = getWrongWords().get(tokenString);
        }
        if (possibleReplacements == null && checkLemmas) {
            possibleReplacements = new ArrayList<>();
            List<String> lemmas = new ArrayList<>();
            for (AnalyzedToken analyzedToken : tokenReadings.getReadings()) {
                String lemma = analyzedToken.getLemma();
                if (lemma != null && getWrongWords().containsKey(lemma) && !lemmas.contains(lemma)) {
                    lemmas.add(cleanup(lemma));
                }
            }
            for (String lemma : lemmas) {
                List<String> replacements = getWrongWords().get(lemma);
                if (replacements != null) {
                    possibleReplacements.addAll(replacements);
                }
            }
            possibleReplacements = possibleReplacements.stream().distinct().collect(Collectors.toList());
        }
        if (possibleReplacements != null && possibleReplacements.size() > 0) {
            List<String> replacements = new ArrayList<>();
            replacements.addAll(possibleReplacements);
            if (replacements.contains(originalTokenStr)) {
                replacements.remove(originalTokenStr);
            }
            if (replacements.size() > 0) {
                RuleMatch potentialRuleMatch = createRuleMatch(tokenReadings, replacements);
                ruleMatches.add(potentialRuleMatch);
            }
        }
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 47 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class AbstractSpaceBeforeRule method match.

@Override
public final RuleMatch[] match(AnalyzedSentence sentence) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = sentence.getTokens();
    for (int i = 1; i < tokens.length; i++) {
        String token = tokens[i].getToken();
        Matcher matcher = getConjunctions().matcher(token);
        if (matcher.matches()) {
            String previousToken = tokens[i - 1].getToken();
            if (!(previousToken.equals(" ") || previousToken.equals("("))) {
                String replacement = " " + token;
                String msg = getSuggestion();
                int pos = tokens[i].getStartPos();
                RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos + token.length(), msg, getShort());
                potentialRuleMatch.setSuggestedReplacement(replacement);
                ruleMatches.add(potentialRuleMatch);
            }
        }
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 48 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class AbstractWordCoherencyRule method match.

@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    // e.g. aufwändig -> RuleMatch of aufwendig
    Map<String, RuleMatch> shouldNotAppearWord = new HashMap<>();
    int pos = 0;
    for (AnalyzedSentence sentence : sentences) {
        AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
        for (AnalyzedTokenReadings tmpToken : tokens) {
            String token = tmpToken.getToken();
            List<AnalyzedToken> readings = tmpToken.getReadings();
            // TODO: in theory we need to care about the other readings, too (affects e.g. German "Schenke" as a noun):
            if (readings.size() > 0) {
                String baseform = readings.get(0).getLemma();
                if (baseform != null) {
                    token = baseform;
                }
            }
            if (shouldNotAppearWord.containsKey(token)) {
                RuleMatch otherMatch = shouldNotAppearWord.get(token);
                String otherSpelling = otherMatch.getMessage();
                String msg = getMessage(token, otherSpelling);
                RuleMatch ruleMatch = new RuleMatch(this, pos + tmpToken.getStartPos(), pos + tmpToken.getEndPos(), msg);
                ruleMatch.setSuggestedReplacement(otherSpelling);
                ruleMatches.add(ruleMatch);
            } else if (getWordMap().containsKey(token)) {
                String shouldNotAppear = getWordMap().get(token);
                RuleMatch potentialRuleMatch = new RuleMatch(this, pos + tmpToken.getStartPos(), pos + tmpToken.getEndPos(), token);
                shouldNotAppearWord.put(shouldNotAppear, potentialRuleMatch);
            }
        }
        pos += sentence.getText().length();
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 49 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class GermanChunker method getBasicChunks.

List<ChunkTaggedToken> getBasicChunks(List<AnalyzedTokenReadings> tokenReadings) {
    List<ChunkTaggedToken> chunkTaggedTokens = new ArrayList<>();
    for (AnalyzedTokenReadings tokenReading : tokenReadings) {
        if (!tokenReading.isWhitespace()) {
            List<ChunkTag> chunkTags = Collections.singletonList(new ChunkTag("O"));
            ChunkTaggedToken chunkTaggedToken = new ChunkTaggedToken(tokenReading.getToken(), chunkTags, tokenReading);
            chunkTaggedTokens.add(chunkTaggedToken);
        }
    }
    if (debug) {
        System.out.println("=============== CHUNKER INPUT ===============");
        System.out.println(getDebugString(chunkTaggedTokens));
    }
    for (RegularExpressionWithPhraseType regex : REGEXES1) {
        apply(regex, chunkTaggedTokens);
    }
    return chunkTaggedTokens;
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 50 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class AgreementRule method isRelevantPronoun.

private boolean isRelevantPronoun(AnalyzedTokenReadings[] tokens, int pos) {
    AnalyzedTokenReadings analyzedToken = tokens[pos];
    boolean relevantPronoun = GermanHelper.hasReadingOfType(analyzedToken, POSType.PRONOMEN);
    // avoid false alarms:
    String token = tokens[pos].getToken();
    if (pos > 0 && tokens[pos - 1].getToken().equalsIgnoreCase("vor") && token.equalsIgnoreCase("allem")) {
        relevantPronoun = false;
    } else if (PRONOUNS_TO_BE_IGNORED.contains(token.toLowerCase())) {
        relevantPronoun = false;
    }
    return relevantPronoun;
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Aggregations

AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)116 AnalyzedToken (org.languagetool.AnalyzedToken)48 ArrayList (java.util.ArrayList)47 AnalyzedSentence (org.languagetool.AnalyzedSentence)21 Test (org.junit.Test)16 RuleMatch (org.languagetool.rules.RuleMatch)14 Matcher (java.util.regex.Matcher)13 IOException (java.io.IOException)7 Nullable (org.jetbrains.annotations.Nullable)6 JLanguageTool (org.languagetool.JLanguageTool)6 Pattern (java.util.regex.Pattern)5 ChunkTag (org.languagetool.chunking.ChunkTag)5 English (org.languagetool.language.English)3 TaggedWord (org.languagetool.tagging.TaggedWord)3 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2