use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class GermanTaggerTest method toSortedString.
/**
* Returns a string representation like {@code toString()}, but sorts
* the elements alphabetically.
*/
private String toSortedString(AnalyzedTokenReadings tokenReadings) {
StringBuilder sb = new StringBuilder(tokenReadings.getToken());
Set<String> elements = new TreeSet<>();
sb.append('[');
for (AnalyzedToken reading : tokenReadings) {
if (!elements.contains(reading.toString())) {
elements.add(reading.toString());
}
}
sb.append(String.join(", ", elements));
sb.append(']');
return sb.toString();
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class GreekTagger method additionalTags.
@Override
protected List<AnalyzedToken> additionalTags(String word, WordTagger wordTagger) {
List<AnalyzedToken> tokens = new ArrayList<>();
List<Lemma> lemma = tagger.getLemma(word, false);
for (Lemma lm : lemma) {
AnalyzedToken tk = new AnalyzedToken(word, lm.getTag(), lm.getLemma());
tokens.add(tk);
}
return tokens;
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class AbstractEnglishSpellerRule method getIrregularFormsOrNull.
@Nullable
private IrregularForms getIrregularFormsOrNull(String word, String wordSuffix, List<String> suffixes, String posTag, String posName, String formName) {
try {
for (String suffix : suffixes) {
if (word.endsWith(wordSuffix)) {
String baseForm = word.substring(0, word.length() - suffix.length());
String[] forms = synthesizer.synthesize(new AnalyzedToken(word, null, baseForm), posTag);
List<String> result = new ArrayList<>();
for (String form : forms) {
if (!speller1.isMisspelled(form)) {
// only accept suggestions that the spellchecker will accept
result.add(form);
}
}
// the internal dict might contain forms that the spell checker doesn't accept (e.g. 'criterions'),
// but we trust the spell checker in this case:
result.remove(word);
// non-standard usage
result.remove("badder");
// non-standard usage
result.remove("baddest");
// can be removed after dict update
result.remove("spake");
if (result.size() > 0) {
return new IrregularForms(baseForm, posName, formName, result);
}
}
}
return null;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class VerbAgreementRule method getVerbSuggestions.
/**
* @return a list of forms of @param verb which match @param expectedVerbPOS (person:number)
* @param toUppercase true when the suggestions should be capitalized
*/
private List<String> getVerbSuggestions(AnalyzedTokenReadings verb, String expectedVerbPOS, boolean toUppercase) {
// find the first verb reading
AnalyzedToken verbToken = new AnalyzedToken("", "", "");
for (AnalyzedToken token : verb.getReadings()) {
//noinspection ConstantConditions
if (token.getPOSTag().startsWith("VER:")) {
verbToken = token;
break;
}
}
try {
String[] synthesized = language.getSynthesizer().synthesize(verbToken, "VER.*:" + expectedVerbPOS + ".*", true);
// remove duplicates
Set<String> suggestionSet = new HashSet<>(Arrays.asList(synthesized));
List<String> suggestions = new ArrayList<>(suggestionSet);
if (toUppercase) {
for (int i = 0; i < suggestions.size(); ++i) {
suggestions.set(i, StringTools.uppercaseFirstChar(suggestions.get(i)));
}
}
Collections.sort(suggestions);
return suggestions;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class GermanSynthesizer method getCompoundForms.
@NotNull
private String[] getCompoundForms(AnalyzedToken token, String posTag, boolean posTagRegExp) throws IOException {
List<String> parts = splitter.tokenize(token.getToken());
String firstPart = String.join("", parts.subList(0, parts.size() - 1));
String lastPart = StringTools.uppercaseFirstChar(parts.get(parts.size() - 1));
AnalyzedToken lastPartToken = new AnalyzedToken(lastPart, posTag, lastPart);
String[] lastPartForms;
if (posTagRegExp) {
lastPartForms = super.synthesize(lastPartToken, posTag, true);
} else {
lastPartForms = super.synthesize(lastPartToken, posTag);
}
// avoid dupes
Set<String> results = new LinkedHashSet<>();
for (String part : lastPartForms) {
results.add(firstPart + StringTools.lowercaseFirstChar(part));
}
return results.toArray(new String[results.size()]);
}
Aggregations