Search in sources :

Example 1 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class ConfusionRuleEvaluator method evaluate.

@SuppressWarnings("ConstantConditions")
private void evaluate(List<Sentence> sentences, boolean isCorrect, String token, String homophoneToken, List<Long> evalFactors) throws IOException {
    println("======================");
    printf("Starting evaluation on " + sentences.size() + " sentences with %s/%s:\n", token, homophoneToken);
    JLanguageTool lt = new JLanguageTool(language);
    List<Rule> allActiveRules = lt.getAllActiveRules();
    for (Rule activeRule : allActiveRules) {
        lt.disableRule(activeRule.getId());
    }
    for (Sentence sentence : sentences) {
        String textToken = isCorrect ? token : homophoneToken;
        String plainText = sentence.getText();
        String replacement = plainText.indexOf(textToken) == 0 ? StringTools.uppercaseFirstChar(token) : token;
        String replacedTokenSentence = isCorrect ? plainText : plainText.replaceFirst("(?i)\\b" + textToken + "\\b", replacement);
        AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(replacedTokenSentence);
        for (Long factor : evalFactors) {
            rule.setConfusionSet(new ConfusionSet(factor, homophoneToken, token));
            RuleMatch[] matches = rule.match(analyzedSentence);
            boolean consideredCorrect = matches.length == 0;
            String displayStr = plainText.replaceFirst("(?i)\\b" + textToken + "\\b", "**" + replacement + "**");
            if (consideredCorrect && isCorrect) {
                evalValues.get(factor).trueNegatives++;
            } else if (!consideredCorrect && isCorrect) {
                evalValues.get(factor).falsePositives++;
                println("false positive with factor " + factor + ": " + displayStr);
            } else if (consideredCorrect && !isCorrect) {
                //println("false negative: " + displayStr);
                evalValues.get(factor).falseNegatives++;
            } else {
                evalValues.get(factor).truePositives++;
            //System.out.println("true positive: " + displayStr);
            }
        }
    }
}
Also used : ConfusionSet(org.languagetool.rules.ConfusionSet) AnalyzedSentence(org.languagetool.AnalyzedSentence) RuleMatch(org.languagetool.rules.RuleMatch) JLanguageTool(org.languagetool.JLanguageTool) ConfusionProbabilityRule(org.languagetool.rules.ngrams.ConfusionProbabilityRule) Rule(org.languagetool.rules.Rule) AnalyzedSentence(org.languagetool.AnalyzedSentence)

Example 2 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class GermanUppercasePhraseFinder method isRelevant.

private static boolean isRelevant(JLanguageTool lt, String term) throws IOException {
    AnalyzedSentence analyzedSentence = lt.analyzeText(term).get(0);
    AnalyzedTokenReadings[] tokens = analyzedSentence.getTokensWithoutWhitespace();
    if (tokens.length == 1 + 2) {
        // 1 is for sentence start
        if (tokens[1].hasPartialPosTag("ADJ:") && tokens[2].hasPartialPosTag("SUB:")) {
            return true;
        }
    }
    return false;
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 3 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class UkrainianDisambiguationRuleTest method testChunker.

@Test
public void testChunker() throws Exception {
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test) DisambiguationRuleTest(org.languagetool.tagging.disambiguation.rules.DisambiguationRuleTest)

Example 4 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class RussianWordCoherencyRuleTest method assertError.

private void assertError(String s) throws IOException {
    RussianWordCoherencyRule rule = new RussianWordCoherencyRule(TestTools.getEnglishMessages());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(s);
    assertEquals(1, rule.match(Collections.singletonList(analyzedSentence)).length);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence)

Example 5 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class UppercaseSentenceStartRule method match.

@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
    String lastParagraphString = "";
    List<RuleMatch> ruleMatches = new ArrayList<>();
    int pos = 0;
    for (AnalyzedSentence sentence : sentences) {
        AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
        if (tokens.length < 2) {
            return toRuleMatchArray(ruleMatches);
        }
        // 0 = SENT_START
        int matchTokenPos = 1;
        AnalyzedTokenReadings firstTokenObj = tokens[matchTokenPos];
        String firstToken = firstTokenObj.getToken();
        String secondToken = null;
        String thirdToken = null;
        // ignore quote characters:
        if (tokens.length >= 3 && QUOTE_START.matcher(firstToken).matches()) {
            matchTokenPos = 2;
            secondToken = tokens[matchTokenPos].getToken();
        }
        String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
        if (firstDutchToken != null) {
            thirdToken = firstDutchToken;
            matchTokenPos = 3;
        }
        String checkToken = firstToken;
        if (thirdToken != null) {
            checkToken = thirdToken;
        } else if (secondToken != null) {
            checkToken = secondToken;
        }
        String lastToken = tokens[tokens.length - 1].getToken();
        if (WHITESPACE_OR_QUOTE.matcher(lastToken).matches()) {
            // ignore trailing whitespace or quote
            lastToken = tokens[tokens.length - 2].getToken();
        }
        boolean preventError = false;
        if (lastParagraphString.equals(",") || lastParagraphString.equals(";")) {
            preventError = true;
        }
        if (!SENTENCE_END1.matcher(lastParagraphString).matches() && !SENTENCE_END2.matcher(lastToken).matches()) {
            preventError = true;
        }
        lastParagraphString = lastToken;
        //allows enumeration with lowercase letters: a), iv., etc.
        if (matchTokenPos + 1 < tokens.length && NUMERALS_EN.matcher(tokens[matchTokenPos].getToken()).matches() && (tokens[matchTokenPos + 1].getToken().equals(".") || tokens[matchTokenPos + 1].getToken().equals(")"))) {
            preventError = true;
        }
        if (isUrl(checkToken) || isEMail(checkToken) || firstTokenObj.isImmunized()) {
            preventError = true;
        }
        if (checkToken.length() > 0) {
            char firstChar = checkToken.charAt(0);
            if (!preventError && Character.isLowerCase(firstChar)) {
                RuleMatch ruleMatch = new RuleMatch(this, pos + tokens[matchTokenPos].getStartPos(), pos + tokens[matchTokenPos].getEndPos(), messages.getString("incorrect_case"));
                ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
                ruleMatches.add(ruleMatch);
            }
        }
        pos += sentence.getText().length();
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Aggregations

AnalyzedSentence (org.languagetool.AnalyzedSentence)40 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)21 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 JLanguageTool (org.languagetool.JLanguageTool)8 RuleMatch (org.languagetool.rules.RuleMatch)8 Rule (org.languagetool.rules.Rule)5 IOException (java.io.IOException)4 DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)4 English (org.languagetool.language.English)3 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)3 AnalyzedToken (org.languagetool.AnalyzedToken)2 Ukrainian (org.languagetool.language.Ukrainian)2 InputStream (java.io.InputStream)1 Document (org.apache.lucene.document.Document)1 ConfusionSet (org.languagetool.rules.ConfusionSet)1 CorrectExample (org.languagetool.rules.CorrectExample)1 IncorrectExample (org.languagetool.rules.IncorrectExample)1 BitextRule (org.languagetool.rules.bitext.BitextRule)1 ConfusionProbabilityRule (org.languagetool.rules.ngrams.ConfusionProbabilityRule)1