use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class AbstractSimpleReplaceRule method match.
@Override
public RuleMatch[] match(AnalyzedSentence sentence) {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
for (AnalyzedTokenReadings tokenReadings : tokens) {
// short for SENT_START
if (JLanguageTool.SENTENCE_START_TAGNAME.equals(tokenReadings.getAnalyzedToken(0).getPOSTag()))
continue;
// and speller-ignorable rules
if (tokenReadings.isImmunized() || tokenReadings.isIgnoredBySpeller()) {
continue;
}
String originalTokenStr = tokenReadings.getToken();
if (ignoreTaggedWords && isTagged(tokenReadings)) {
continue;
}
String tokenString = cleanup(originalTokenStr);
// try first with the original word, then with the all lower-case version
List<String> possibleReplacements = getWrongWords().get(originalTokenStr);
if (possibleReplacements == null) {
possibleReplacements = getWrongWords().get(tokenString);
}
if (possibleReplacements == null && checkLemmas) {
possibleReplacements = new ArrayList<>();
List<String> lemmas = new ArrayList<>();
for (AnalyzedToken analyzedToken : tokenReadings.getReadings()) {
String lemma = analyzedToken.getLemma();
if (lemma != null && getWrongWords().containsKey(lemma) && !lemmas.contains(lemma)) {
lemmas.add(cleanup(lemma));
}
}
for (String lemma : lemmas) {
List<String> replacements = getWrongWords().get(lemma);
if (replacements != null) {
possibleReplacements.addAll(replacements);
}
}
possibleReplacements = possibleReplacements.stream().distinct().collect(Collectors.toList());
}
if (possibleReplacements != null && possibleReplacements.size() > 0) {
List<String> replacements = new ArrayList<>();
replacements.addAll(possibleReplacements);
if (replacements.contains(originalTokenStr)) {
replacements.remove(originalTokenStr);
}
if (replacements.size() > 0) {
RuleMatch potentialRuleMatch = createRuleMatch(tokenReadings, replacements);
ruleMatches.add(potentialRuleMatch);
}
}
}
return toRuleMatchArray(ruleMatches);
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class AbstractSpaceBeforeRule method match.
@Override
public final RuleMatch[] match(AnalyzedSentence sentence) {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokens();
for (int i = 1; i < tokens.length; i++) {
String token = tokens[i].getToken();
Matcher matcher = getConjunctions().matcher(token);
if (matcher.matches()) {
String previousToken = tokens[i - 1].getToken();
if (!(previousToken.equals(" ") || previousToken.equals("("))) {
String replacement = " " + token;
String msg = getSuggestion();
int pos = tokens[i].getStartPos();
RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos + token.length(), msg, getShort());
potentialRuleMatch.setSuggestedReplacement(replacement);
ruleMatches.add(potentialRuleMatch);
}
}
}
return toRuleMatchArray(ruleMatches);
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class AbstractWordCoherencyRule method match.
@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) {
List<RuleMatch> ruleMatches = new ArrayList<>();
// e.g. aufwändig -> RuleMatch of aufwendig
Map<String, RuleMatch> shouldNotAppearWord = new HashMap<>();
int pos = 0;
for (AnalyzedSentence sentence : sentences) {
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
for (AnalyzedTokenReadings tmpToken : tokens) {
String token = tmpToken.getToken();
List<AnalyzedToken> readings = tmpToken.getReadings();
// TODO: in theory we need to care about the other readings, too (affects e.g. German "Schenke" as a noun):
if (readings.size() > 0) {
String baseform = readings.get(0).getLemma();
if (baseform != null) {
token = baseform;
}
}
if (shouldNotAppearWord.containsKey(token)) {
RuleMatch otherMatch = shouldNotAppearWord.get(token);
String otherSpelling = otherMatch.getMessage();
String msg = getMessage(token, otherSpelling);
RuleMatch ruleMatch = new RuleMatch(this, pos + tmpToken.getStartPos(), pos + tmpToken.getEndPos(), msg);
ruleMatch.setSuggestedReplacement(otherSpelling);
ruleMatches.add(ruleMatch);
} else if (getWordMap().containsKey(token)) {
String shouldNotAppear = getWordMap().get(token);
RuleMatch potentialRuleMatch = new RuleMatch(this, pos + tmpToken.getStartPos(), pos + tmpToken.getEndPos(), token);
shouldNotAppearWord.put(shouldNotAppear, potentialRuleMatch);
}
}
pos += sentence.getText().length();
}
return toRuleMatchArray(ruleMatches);
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class GermanChunker method getBasicChunks.
List<ChunkTaggedToken> getBasicChunks(List<AnalyzedTokenReadings> tokenReadings) {
List<ChunkTaggedToken> chunkTaggedTokens = new ArrayList<>();
for (AnalyzedTokenReadings tokenReading : tokenReadings) {
if (!tokenReading.isWhitespace()) {
List<ChunkTag> chunkTags = Collections.singletonList(new ChunkTag("O"));
ChunkTaggedToken chunkTaggedToken = new ChunkTaggedToken(tokenReading.getToken(), chunkTags, tokenReading);
chunkTaggedTokens.add(chunkTaggedToken);
}
}
if (debug) {
System.out.println("=============== CHUNKER INPUT ===============");
System.out.println(getDebugString(chunkTaggedTokens));
}
for (RegularExpressionWithPhraseType regex : REGEXES1) {
apply(regex, chunkTaggedTokens);
}
return chunkTaggedTokens;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class AgreementRule method isRelevantPronoun.
private boolean isRelevantPronoun(AnalyzedTokenReadings[] tokens, int pos) {
AnalyzedTokenReadings analyzedToken = tokens[pos];
boolean relevantPronoun = GermanHelper.hasReadingOfType(analyzedToken, POSType.PRONOMEN);
// avoid false alarms:
String token = tokens[pos].getToken();
if (pos > 0 && tokens[pos - 1].getToken().equalsIgnoreCase("vor") && token.equalsIgnoreCase("allem")) {
relevantPronoun = false;
} else if (PRONOUNS_TO_BE_IGNORED.contains(token.toLowerCase())) {
relevantPronoun = false;
}
return relevantPronoun;
}
Aggregations