use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class NumeralStressRule method match.
@Override
public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
for (AnalyzedTokenReadings token : tokens) {
Matcher m = numeral.matcher(token.getToken());
if (m.matches()) {
String number = m.group(1);
String suffix = m.group(2);
boolean needsStress = stressedNumber.matcher(number).matches();
boolean hasStress = stressedSuffix.matcher(suffix).matches();
if (needsStress != hasStress) {
suffix = suffixMap.get(suffix);
String suggestion = number + suffix;
String msg = "<suggestion>" + suggestion + "</suggestion>";
RuleMatch match = new RuleMatch(this, token.getStartPos(), token.getEndPos(), msg, "Πρόβλημα ορθογραφίας");
ruleMatches.add(match);
}
}
}
return toRuleMatchArray(ruleMatches);
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class EnglishChunker method getAnalyzedTokenReadingsFor.
// Get only exact position matches - i.e. this can only be used for a trivial mapping
// where tokens that are not exactly at the same position will be skipped. For example,
// the tokens of "I'll" ([I] ['ll] vs [I]['][ll) cannot be mapped with this.
@Nullable
private AnalyzedTokenReadings getAnalyzedTokenReadingsFor(int startPos, int endPos, List<AnalyzedTokenReadings> tokenReadings) {
int pos = 0;
for (AnalyzedTokenReadings tokenReading : tokenReadings) {
String token = tokenReading.getToken();
if (token.trim().isEmpty()) {
// the OpenNLP result has no whitespace, so we need to skip it
continue;
}
int tokenStart = pos;
int tokenEnd = pos + token.length();
if (tokenStart == startPos && tokenEnd == endPos) {
//System.out.println("!!!" + startPos + " " + endPos + " " + tokenReading);
return tokenReading;
}
pos = tokenEnd;
}
return null;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class EnglishChunker method getTokensWithTokenReadings.
private List<ChunkTaggedToken> getTokensWithTokenReadings(List<AnalyzedTokenReadings> tokenReadings, String[] tokens, String[] chunkTags) {
List<ChunkTaggedToken> result = new ArrayList<>();
int i = 0;
int pos = 0;
for (String chunkTag : chunkTags) {
int startPos = pos;
int endPos = startPos + tokens[i].length();
//System.out.println("OPEN: " + tokens[i]);
AnalyzedTokenReadings readings = getAnalyzedTokenReadingsFor(startPos, endPos, tokenReadings);
result.add(new ChunkTaggedToken(tokens[i], Collections.singletonList(new ChunkTag(chunkTag)), readings));
pos = endPos;
i++;
}
return result;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class SubjectVerbAgreementRule method prevChunkIsNominative.
boolean prevChunkIsNominative(AnalyzedTokenReadings[] tokens, int startPos) {
for (int i = startPos; i > 0; i--) {
AnalyzedTokenReadings token = tokens[i];
List<ChunkTag> chunkTags = token.getChunkTags();
if (chunkTags.contains(NPS) || chunkTags.contains(NPP)) {
if (token.hasPartialPosTag("NOM")) {
return true;
}
} else {
return false;
}
}
return false;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class GermanTagger method lookup.
/**
* Return only the first reading of the given word or {@code null}.
*/
@Nullable
public AnalyzedTokenReadings lookup(String word) throws IOException {
List<AnalyzedTokenReadings> result = tag(Collections.singletonList(word), false);
AnalyzedTokenReadings atr = result.get(0);
if (atr.getAnalyzedToken(0).getPOSTag() == null) {
return null;
}
return atr;
}
Aggregations