Search in sources :

Example 1 with IncludeRange

use of org.languagetool.rules.patterns.Match.IncludeRange in project languagetool by languagetool-org.

the class MatchState method toFinalString.

/**
   * Gets all strings formatted using the match element.
   */
public final String[] toFinalString(Language lang) throws IOException {
    String[] formattedString = new String[1];
    if (formattedToken != null) {
        int readingCount = formattedToken.getReadingsLength();
        formattedString[0] = formattedToken.getToken();
        Pattern pRegexMatch = match.getRegexMatch();
        String regexReplace = match.getRegexReplace();
        if (pRegexMatch != null) {
            formattedString[0] = pRegexMatch.matcher(formattedString[0]).replaceAll(regexReplace);
        }
        String posTag = match.getPosTag();
        if (posTag != null) {
            if (synthesizer == null) {
                formattedString[0] = formattedToken.getToken();
            } else if (match.isPostagRegexp()) {
                TreeSet<String> wordForms = new TreeSet<>();
                boolean oneForm = false;
                for (int k = 0; k < readingCount; k++) {
                    if (formattedToken.getAnalyzedToken(k).getLemma() == null) {
                        String posUnique = formattedToken.getAnalyzedToken(k).getPOSTag();
                        if (posUnique == null) {
                            wordForms.add(formattedToken.getToken());
                            oneForm = true;
                        } else {
                            if (SENTENCE_START_TAGNAME.equals(posUnique) || SENTENCE_END_TAGNAME.equals(posUnique) || PARAGRAPH_END_TAGNAME.equals(posUnique)) {
                                if (!oneForm) {
                                    wordForms.add(formattedToken.getToken());
                                }
                                oneForm = true;
                            } else {
                                oneForm = false;
                            }
                        }
                    }
                }
                String targetPosTag = getTargetPosTag();
                if (!oneForm) {
                    for (int i = 0; i < readingCount; i++) {
                        String[] possibleWordForms = synthesizer.synthesize(formattedToken.getAnalyzedToken(i), targetPosTag, true);
                        if (possibleWordForms != null) {
                            wordForms.addAll(Arrays.asList(possibleWordForms));
                        }
                    }
                }
                if (wordForms.isEmpty()) {
                    if (match.checksSpelling()) {
                        formattedString[0] = "";
                    } else {
                        formattedString[0] = "(" + formattedToken.getToken() + ")";
                    }
                } else {
                    formattedString = wordForms.toArray(new String[wordForms.size()]);
                }
            } else {
                TreeSet<String> wordForms = new TreeSet<>();
                for (int i = 0; i < readingCount; i++) {
                    String[] possibleWordForms = synthesizer.synthesize(formattedToken.getAnalyzedToken(i), posTag);
                    if (possibleWordForms != null) {
                        wordForms.addAll(Arrays.asList(possibleWordForms));
                    }
                }
                formattedString = wordForms.toArray(new String[wordForms.size()]);
            }
        }
    }
    String original;
    if (match.isStaticLemma()) {
        original = matchedToken != null ? matchedToken.getToken() : "";
    } else {
        original = formattedToken != null ? formattedToken.getToken() : "";
    }
    for (int i = 0; i < formattedString.length; i++) {
        formattedString[i] = convertCase(formattedString[i], original, lang);
    }
    // TODO should case conversion happen before or after including skipped tokens?
    IncludeRange includeSkipped = match.getIncludeSkipped();
    if (includeSkipped != IncludeRange.NONE && skippedTokens != null && !skippedTokens.isEmpty()) {
        String[] helper = new String[formattedString.length];
        for (int i = 0; i < formattedString.length; i++) {
            if (formattedString[i] == null) {
                formattedString[i] = "";
            }
            helper[i] = formattedString[i] + skippedTokens;
        }
        formattedString = helper;
    }
    if (match.checksSpelling() && lang != null) {
        List<String> formattedStringElements = Arrays.asList(formattedString);
        // tagger-based speller
        List<AnalyzedTokenReadings> analyzed = lang.getTagger().tag(formattedStringElements);
        for (int i = 0; i < formattedString.length; i++) {
            AnalyzedToken analyzedToken = analyzed.get(i).getAnalyzedToken(0);
            if (analyzedToken.getLemma() == null && analyzedToken.hasNoTag()) {
                formattedString[i] = "";
            }
        }
    }
    return formattedString;
}
Also used : IncludeRange(org.languagetool.rules.patterns.Match.IncludeRange) Pattern(java.util.regex.Pattern) AnalyzedToken(org.languagetool.AnalyzedToken) TreeSet(java.util.TreeSet) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 2 with IncludeRange

use of org.languagetool.rules.patterns.Match.IncludeRange in project languagetool by languagetool-org.

the class MatchState method setToken.

/**
   * Sets the token to be formatted etc. and includes the support for
   * including the skipped tokens.
   *
   * @param tokens Array of tokens
   * @param index Index of the token to be formatted
   * @param next Position of the next token (the skipped tokens are the ones between the tokens[index] and tokens[next]
   */
public final void setToken(AnalyzedTokenReadings[] tokens, int index, int next) {
    int idx = index;
    if (index >= tokens.length) {
        // TODO: hacky workaround, find a proper solution. See EnglishPatternRuleTest.testBug()
        idx = tokens.length - 1;
    }
    setToken(tokens[idx]);
    IncludeRange includeSkipped = match.getIncludeSkipped();
    if (next > 1 && includeSkipped != IncludeRange.NONE) {
        StringBuilder sb = new StringBuilder();
        if (includeSkipped == IncludeRange.FOLLOWING) {
            formattedToken = null;
        }
        for (int k = index + 1; k < index + next; k++) {
            if (tokens[k].isWhitespaceBefore() && !(k == index + 1 && includeSkipped == IncludeRange.FOLLOWING)) {
                sb.append(' ');
            }
            sb.append(tokens[k].getToken());
        }
        skippedTokens = sb.toString();
    } else {
        skippedTokens = "";
    }
}
Also used : IncludeRange(org.languagetool.rules.patterns.Match.IncludeRange)

Aggregations

IncludeRange (org.languagetool.rules.patterns.Match.IncludeRange)2 TreeSet (java.util.TreeSet)1 Pattern (java.util.regex.Pattern)1 AnalyzedToken (org.languagetool.AnalyzedToken)1 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)1