use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class SubjectVerbAgreementRule method getPluralMatchOrNull.
@Nullable
private RuleMatch getPluralMatchOrNull(AnalyzedTokenReadings[] tokens, int i, AnalyzedTokenReadings token, String tokenStr) {
if (plural.contains(tokenStr)) {
AnalyzedTokenReadings prevToken = tokens[i - 1];
List<ChunkTag> prevChunkTags = prevToken.getChunkTags();
boolean match = prevChunkTags.contains(NPS) && !prevChunkTags.contains(NPP) && !prevChunkTags.contains(PP) && !isCurrency(prevToken) && prevChunkIsNominative(tokens, i - 1) && !hasUnknownTokenToTheLeft(tokens, i) && !hasUnknownTokenToTheRight(tokens, i + 1) && // z.B. "Die Zielgruppe sind Männer." - beides Nominativ, aber 'Männer' ist das Subjekt
!isFollowedByNominativePlural(tokens, i + 1);
if (match) {
String message = "Bitte prüfen, ob hier <suggestion>" + getSingularFor(tokenStr) + "</suggestion> stehen sollte.";
return new RuleMatch(this, token.getStartPos(), token.getEndPos(), message);
}
}
return null;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class UppercaseNounReadingFilter method acceptRuleMatch.
@Nullable
@Override
public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments, AnalyzedTokenReadings[] patternTokens) {
String token = arguments.get("token");
if (token == null) {
throw new RuntimeException("Set 'token' for filter " + UppercaseNounReadingFilter.class.getName() + " in rule " + match.getRule().getId());
}
try {
String uppercase = StringTools.uppercaseFirstChar(token);
List<AnalyzedTokenReadings> tags = tagger.tag(Collections.singletonList(uppercase));
boolean hasNounReading = false;
for (AnalyzedTokenReadings tag : tags) {
if (tag.hasPartialPosTag("SUB:") && !tag.hasPartialPosTag("ADJ")) {
hasNounReading = true;
break;
}
}
if (hasNounReading) {
return match;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return null;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class VerbAgreementRule method match.
private List<RuleMatch> match(AnalyzedSentence sentence, int pos) {
AnalyzedTokenReadings finiteVerb = null;
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
if (tokens.length < 4) {
// ignore one-word sentences (3 tokens: SENT_START, one word, SENT_END)
return ruleMatches;
}
// position of the pronouns:
int posIch = -1;
int posDu = -1;
int posEr = -1;
int posWir = -1;
// positions of verbs which do match in person and number, and do not match any other person nor number:
int posVer1Sin = -1;
int posVer2Sin = -1;
int posVer1Plu = -1;
/*int posVer2Plu = -1;*/
// positions of verbs which do match in person and number:
int posPossibleVer1Sin = -1;
int posPossibleVer2Sin = -1;
int posPossibleVer3Sin = -1;
int posPossibleVer1Plu = -1;
for (int i = 1; i < tokens.length; ++i) {
if (tokens[i].isImmunized()) {
continue;
}
String strToken = tokens[i].getToken().toLowerCase();
strToken = strToken.replace("‚", "");
switch(strToken) {
case "ich":
posIch = i;
break;
case "du":
posDu = i;
break;
case "er":
posEr = i;
break;
case "wir":
posWir = i;
break;
}
if (tokens[i].hasPartialPosTag("VER") && (Character.isLowerCase(tokens[i].getToken().charAt(0)) || i == 1)) {
if (hasUnambiguouslyPersonAndNumber(tokens[i], "1", "SIN") && !(strToken.equals("bin") && (BIN_IGNORE.contains(tokens[i - 1].getToken()) || (tokens.length != i + 1 && tokens[i + 1].getToken().startsWith("Laden"))))) {
posVer1Sin = i;
} else if (hasUnambiguouslyPersonAndNumber(tokens[i], "2", "SIN") && !"Probst".equals(tokens[i].getToken())) {
posVer2Sin = i;
} else if (hasUnambiguouslyPersonAndNumber(tokens[i], "1", "PLU")) {
posVer1Plu = i;
// } else if (hasUnambiguouslyPersonAndNumber(tokens[i], "2", "PLU")) {
// posVer2Plu = i;
}
if (tokens[i].hasPartialPosTag(":1:SIN")) {
posPossibleVer1Sin = i;
}
if (tokens[i].hasPartialPosTag(":2:SIN")) {
posPossibleVer2Sin = i;
}
if (tokens[i].hasPartialPosTag(":3:SIN")) {
posPossibleVer3Sin = i;
}
if (tokens[i].hasPartialPosTag(":1:PLU")) {
posPossibleVer1Plu = i;
}
// if (tokens[i].hasPartialPosTag(":2:PLU"))
// posPossibleVer2Plu = i;
}
}
if (posVer1Sin != -1 && posIch == -1 && !isQuotationMark(tokens[posVer1Sin - 1])) {
// 1st pers sg verb but no "ich"
ruleMatches.add(ruleMatchWrongVerb(tokens[posVer1Sin], pos));
} else if (// check whether verb next to "ich" is 1st pers sg
posIch > 0 && !isNear(posPossibleVer1Sin, posIch) && // ignore "lyrisches Ich" etc.
(tokens[posIch].getToken().equals("ich") || tokens[posIch].getStartPos() == 0) && !isQuotationMark(tokens[posIch - 1])) {
// prevent posIch+1 segfault
int plus1 = ((posIch + 1) == tokens.length) ? 0 : +1;
BooleanAndFiniteVerb check = verbDoesMatchPersonAndNumber(tokens[posIch - 1], tokens[posIch + plus1], "1", "SIN", finiteVerb);
if (!check.verbDoesMatchPersonAndNumber) {
if (!nextButOneIsModal(tokens, posIch) && !"äußerst".equals(check.finiteVerb.getToken())) {
ruleMatches.add(ruleMatchWrongVerbSubject(tokens[posIch], check.finiteVerb, "1:SIN", pos));
}
}
}
if (posVer2Sin != -1 && posDu == -1 && !isQuotationMark(tokens[posVer2Sin - 1])) {
ruleMatches.add(ruleMatchWrongVerb(tokens[posVer2Sin], pos));
} else if (posDu > 0 && !isNear(posPossibleVer2Sin, posDu) && !isQuotationMark(tokens[posDu - 1])) {
int plus1 = ((posDu + 1) == tokens.length) ? 0 : +1;
BooleanAndFiniteVerb check = verbDoesMatchPersonAndNumber(tokens[posDu - 1], tokens[posDu + plus1], "2", "SIN", finiteVerb);
if (!check.verbDoesMatchPersonAndNumber && // "Wenn ich du wäre"
!tokens[posDu + plus1].hasPartialPosTag("VER:1:SIN:KJ2") && // "dass du billige Klamotten..."
!tokens[posDu + plus1].hasPartialPosTag("ADJ:") && !tokens[posDu - 1].hasPartialPosTag("VER:1:SIN:KJ2")) {
if (!nextButOneIsModal(tokens, posDu)) {
ruleMatches.add(ruleMatchWrongVerbSubject(tokens[posDu], check.finiteVerb, "2:SIN", pos));
}
}
}
if (posEr > 0 && !isNear(posPossibleVer3Sin, posEr) && !isQuotationMark(tokens[posEr - 1])) {
int plus1 = ((posEr + 1) == tokens.length) ? 0 : +1;
BooleanAndFiniteVerb check = verbDoesMatchPersonAndNumber(tokens[posEr - 1], tokens[posEr + plus1], "3", "SIN", finiteVerb);
if (!check.verbDoesMatchPersonAndNumber && !nextButOneIsModal(tokens, posEr) && !"äußerst".equals(check.finiteVerb.getToken()) && !"regen".equals(check.finiteVerb.getToken())) {
// "wo er regen Anteil nahm"
ruleMatches.add(ruleMatchWrongVerbSubject(tokens[posEr], check.finiteVerb, "3:SIN", pos));
}
}
if (posVer1Plu != -1 && posWir == -1 && !isQuotationMark(tokens[posVer1Plu - 1])) {
ruleMatches.add(ruleMatchWrongVerb(tokens[posVer1Plu], pos));
} else if (posWir > 0 && !isNear(posPossibleVer1Plu, posWir) && !isQuotationMark(tokens[posWir - 1])) {
int plus1 = ((posWir + 1) == tokens.length) ? 0 : +1;
BooleanAndFiniteVerb check = verbDoesMatchPersonAndNumber(tokens[posWir - 1], tokens[posWir + plus1], "1", "PLU", finiteVerb);
if (!check.verbDoesMatchPersonAndNumber && !nextButOneIsModal(tokens, posWir)) {
ruleMatches.add(ruleMatchWrongVerbSubject(tokens[posWir], check.finiteVerb, "1:PLU", pos));
}
}
return ruleMatches;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class GermanTagger method tag.
public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens, boolean ignoreCase) throws IOException {
initializeIfRequired();
boolean firstWord = true;
List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
int pos = 0;
for (String word : sentenceTokens) {
List<AnalyzedToken> l = new ArrayList<>();
List<TaggedWord> taggerTokens = getWordTagger().tag(word);
if (firstWord && taggerTokens.isEmpty() && ignoreCase) {
// e.g. "Das" -> "das" at start of sentence
taggerTokens = getWordTagger().tag(word.toLowerCase());
firstWord = word.matches("^\\W?$");
} else if (pos == 0 && ignoreCase) {
// "Haben", "Sollen", "Können", "Gerade" etc. at start of sentence
taggerTokens.addAll(getWordTagger().tag(word.toLowerCase()));
}
if (taggerTokens.size() > 0) {
l.addAll(getAnalyzedTokens(taggerTokens, word));
} else {
// word not known, try to decompose it and use the last part for POS tagging:
if (!StringTools.isEmpty(word.trim())) {
List<String> compoundParts = compoundTokenizer.tokenize(word);
if (compoundParts.size() <= 1) {
l.add(getNoInfoToken(word));
} else {
// last part governs a word's POS:
String lastPart = compoundParts.get(compoundParts.size() - 1);
if (StringTools.startsWithUppercase(word)) {
lastPart = StringTools.uppercaseFirstChar(lastPart);
}
List<TaggedWord> partTaggerTokens = getWordTagger().tag(lastPart);
if (partTaggerTokens.size() > 0) {
l.addAll(getAnalyzedTokens(partTaggerTokens, word, compoundParts));
} else {
l.add(getNoInfoToken(word));
}
}
} else {
l.add(getNoInfoToken(word));
}
}
tokenReadings.add(new AnalyzedTokenReadings(l.toArray(new AnalyzedToken[l.size()]), pos));
pos += word.length();
}
return tokenReadings;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class GermanChunkerTest method assertBasicChunks.
private void assertBasicChunks(String input) throws Exception {
String plainInput = getPlainInput(input);
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(plainInput);
AnalyzedTokenReadings[] result = analyzedSentence.getTokensWithoutWhitespace();
List<ChunkTaggedToken> basicChunks = chunker.getBasicChunks(Arrays.asList(result));
List<String> expectedChunks = getExpectedChunks(input);
assertChunks(input, plainInput, basicChunks, expectedChunks);
}
Aggregations