use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class ConfusionRuleEvaluator method evaluate.
@SuppressWarnings("ConstantConditions")
private void evaluate(List<Sentence> sentences, boolean isCorrect, String token, String homophoneToken, List<Long> evalFactors) throws IOException {
println("======================");
printf("Starting evaluation on " + sentences.size() + " sentences with %s/%s:\n", token, homophoneToken);
JLanguageTool lt = new JLanguageTool(language);
List<Rule> allActiveRules = lt.getAllActiveRules();
for (Rule activeRule : allActiveRules) {
lt.disableRule(activeRule.getId());
}
for (Sentence sentence : sentences) {
String textToken = isCorrect ? token : homophoneToken;
String plainText = sentence.getText();
String replacement = plainText.indexOf(textToken) == 0 ? StringTools.uppercaseFirstChar(token) : token;
String replacedTokenSentence = isCorrect ? plainText : plainText.replaceFirst("(?i)\\b" + textToken + "\\b", replacement);
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(replacedTokenSentence);
for (Long factor : evalFactors) {
rule.setConfusionSet(new ConfusionSet(factor, homophoneToken, token));
RuleMatch[] matches = rule.match(analyzedSentence);
boolean consideredCorrect = matches.length == 0;
String displayStr = plainText.replaceFirst("(?i)\\b" + textToken + "\\b", "**" + replacement + "**");
if (consideredCorrect && isCorrect) {
evalValues.get(factor).trueNegatives++;
} else if (!consideredCorrect && isCorrect) {
evalValues.get(factor).falsePositives++;
println("false positive with factor " + factor + ": " + displayStr);
} else if (consideredCorrect && !isCorrect) {
//println("false negative: " + displayStr);
evalValues.get(factor).falseNegatives++;
} else {
evalValues.get(factor).truePositives++;
//System.out.println("true positive: " + displayStr);
}
}
}
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class GermanUppercasePhraseFinder method isRelevant.
private static boolean isRelevant(JLanguageTool lt, String term) throws IOException {
AnalyzedSentence analyzedSentence = lt.analyzeText(term).get(0);
AnalyzedTokenReadings[] tokens = analyzedSentence.getTokensWithoutWhitespace();
if (tokens.length == 1 + 2) {
// 1 is for sentence start
if (tokens[1].hasPartialPosTag("ADJ:") && tokens[2].hasPartialPosTag("SUB:")) {
return true;
}
}
return false;
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class UkrainianDisambiguationRuleTest method testChunker.
@Test
public void testChunker() throws Exception {
JLanguageTool lt = new JLanguageTool(new Ukrainian());
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для годиться.");
AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class RussianWordCoherencyRuleTest method assertError.
private void assertError(String s) throws IOException {
RussianWordCoherencyRule rule = new RussianWordCoherencyRule(TestTools.getEnglishMessages());
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(s);
assertEquals(1, rule.match(Collections.singletonList(analyzedSentence)).length);
}
use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.
the class UppercaseSentenceStartRule method match.
@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
String lastParagraphString = "";
List<RuleMatch> ruleMatches = new ArrayList<>();
int pos = 0;
for (AnalyzedSentence sentence : sentences) {
AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
if (tokens.length < 2) {
return toRuleMatchArray(ruleMatches);
}
// 0 = SENT_START
int matchTokenPos = 1;
AnalyzedTokenReadings firstTokenObj = tokens[matchTokenPos];
String firstToken = firstTokenObj.getToken();
String secondToken = null;
String thirdToken = null;
// ignore quote characters:
if (tokens.length >= 3 && QUOTE_START.matcher(firstToken).matches()) {
matchTokenPos = 2;
secondToken = tokens[matchTokenPos].getToken();
}
String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
if (firstDutchToken != null) {
thirdToken = firstDutchToken;
matchTokenPos = 3;
}
String checkToken = firstToken;
if (thirdToken != null) {
checkToken = thirdToken;
} else if (secondToken != null) {
checkToken = secondToken;
}
String lastToken = tokens[tokens.length - 1].getToken();
if (WHITESPACE_OR_QUOTE.matcher(lastToken).matches()) {
// ignore trailing whitespace or quote
lastToken = tokens[tokens.length - 2].getToken();
}
boolean preventError = false;
if (lastParagraphString.equals(",") || lastParagraphString.equals(";")) {
preventError = true;
}
if (!SENTENCE_END1.matcher(lastParagraphString).matches() && !SENTENCE_END2.matcher(lastToken).matches()) {
preventError = true;
}
lastParagraphString = lastToken;
//allows enumeration with lowercase letters: a), iv., etc.
if (matchTokenPos + 1 < tokens.length && NUMERALS_EN.matcher(tokens[matchTokenPos].getToken()).matches() && (tokens[matchTokenPos + 1].getToken().equals(".") || tokens[matchTokenPos + 1].getToken().equals(")"))) {
preventError = true;
}
if (isUrl(checkToken) || isEMail(checkToken) || firstTokenObj.isImmunized()) {
preventError = true;
}
if (checkToken.length() > 0) {
char firstChar = checkToken.charAt(0);
if (!preventError && Character.isLowerCase(firstChar)) {
RuleMatch ruleMatch = new RuleMatch(this, pos + tokens[matchTokenPos].getStartPos(), pos + tokens[matchTokenPos].getEndPos(), messages.getString("incorrect_case"));
ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
ruleMatches.add(ruleMatch);
}
}
pos += sentence.getText().length();
}
return toRuleMatchArray(ruleMatches);
}
Aggregations