use of morfologik.stemming.WordData in project languagetool by languagetool-org.
the class TestTools method testDictionary.
public static void testDictionary(BaseTagger tagger, Language language) throws IOException {
Dictionary dictionary = Dictionary.read(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(tagger.getDictionaryPath()));
DictionaryLookup lookup = new DictionaryLookup(dictionary);
for (WordData wordData : lookup) {
if (wordData.getTag() == null || wordData.getTag().length() == 0) {
System.err.println("**** Warning: " + language + ": the word " + wordData.getWord() + "/" + wordData.getStem() + " lacks a POS tag in the dictionary.");
}
}
}
use of morfologik.stemming.WordData in project languagetool by languagetool-org.
the class PolishSynthesizer method getWordForms.
private List<String> getWordForms(final AnalyzedToken token, final String posTag, final boolean isNegated, final IStemmer synthesizer) {
final List<String> forms = new ArrayList<>();
final List<WordData> wordForms;
if (isNegated) {
wordForms = synthesizer.lookup(token.getLemma() + "|" + posTag.replaceFirst(NEGATION_TAG, POTENTIAL_NEGATION_TAG));
if (wordForms != null) {
for (WordData wd : wordForms) {
forms.add("nie" + wd.getStem());
}
}
} else {
wordForms = synthesizer.lookup(token.getLemma() + "|" + posTag);
for (WordData wd : wordForms) {
if (wd.getStem() != null) {
forms.add(wd.getStem().toString());
}
}
}
return forms;
}
use of morfologik.stemming.WordData in project lucene-solr by apache.
the class MorfologikFilter method popNextLemma.
private void popNextLemma() {
// One tag (concatenated) per lemma.
final WordData lemma = lemmaList.get(lemmaListIndex++);
termAtt.setEmpty().append(lemma.getStem());
CharSequence tag = lemma.getTag();
if (tag != null) {
String[] tags = lemmaSplitter.split(tag.toString());
for (int i = 0; i < tags.length; i++) {
if (tagsList.size() <= i) {
tagsList.add(new StringBuilder());
}
StringBuilder buffer = tagsList.get(i);
buffer.setLength(0);
buffer.append(tags[i]);
}
tagsAtt.setTags(tagsList.subList(0, tags.length));
} else {
tagsAtt.setTags(Collections.<StringBuilder>emptyList());
}
}
Aggregations