use of morfologik.stemming.IStemmer in project languagetool by languagetool-org.
the class CatalanTagger method tag.
@Override
public List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens) throws IOException {
final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
int pos = 0;
final IStemmer dictLookup = new DictionaryLookup(getDictionary());
for (String word : sentenceTokens) {
// This hack allows all rules and dictionary entries to work with
// typewriter apostrophe
boolean containsTypewriterApostrophe = false;
if (word.length() > 1) {
if (word.contains("'")) {
containsTypewriterApostrophe = true;
}
word = word.replace("’", "'");
}
final List<AnalyzedToken> l = new ArrayList<>();
final String lowerWord = word.toLowerCase(conversionLocale);
final boolean isLowercase = word.equals(lowerWord);
final boolean isMixedCase = StringTools.isMixedCase(word);
List<AnalyzedToken> taggerTokens = asAnalyzedTokenListForTaggedWords(word, getWordTagger().tag(word));
// normal case:
addTokens(taggerTokens, l);
// word with lowercase word tags:
if (!isLowercase && !isMixedCase) {
List<AnalyzedToken> lowerTaggerTokens = asAnalyzedTokenListForTaggedWords(word, getWordTagger().tag(lowerWord));
addTokens(lowerTaggerTokens, l);
}
// additional tagging with prefixes
if (l.isEmpty() && !isMixedCase) {
addTokens(additionalTags(word, dictLookup), l);
}
if (l.isEmpty()) {
l.add(new AnalyzedToken(word, null, null));
}
AnalyzedTokenReadings atr = new AnalyzedTokenReadings(l, pos);
if (containsTypewriterApostrophe) {
List<ChunkTag> listChunkTags = new ArrayList<>();
listChunkTags.add(new ChunkTag("containsTypewriterApostrophe"));
atr.setChunkTags(listChunkTags);
}
tokenReadings.add(atr);
pos += word.length();
}
return tokenReadings;
}
use of morfologik.stemming.IStemmer in project languagetool by languagetool-org.
the class PolishSynthesizer method synthesize.
@Override
public final String[] synthesize(final AnalyzedToken token, final String posTag) throws IOException {
if (posTag == null) {
return null;
}
final IStemmer synthesizer = new DictionaryLookup(getDictionary());
boolean isNegated = false;
if (token.getPOSTag() != null) {
isNegated = posTag.indexOf(NEGATION_TAG) > 0 || token.getPOSTag().indexOf(NEGATION_TAG) > 0 && !(posTag.indexOf(COMP_TAG) > 0) && !(posTag.indexOf(SUP_TAG) > 0);
}
if (posTag.indexOf('+') > 0) {
return synthesize(token, posTag, true);
}
final List<String> forms = getWordForms(token, posTag, isNegated, synthesizer);
return forms.toArray(new String[forms.size()]);
}
Aggregations