use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class GermanTaggerEnhancer method run.
private void run() throws IOException {
final Dictionary dictionary = Dictionary.read(JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/de/german.dict"));
final DictionaryLookup dl = new DictionaryLookup(dictionary);
Tagger tagger = new German().getTagger();
String prev = null;
for (WordData wd : dl) {
String word = wd.getWord().toString();
if (word.endsWith("er") && StringTools.startsWithUppercase(word)) {
if (!hasAdjReading(tagger, word) && isEigenname(tagger, word.substring(0, word.length() - 2)) && !word.equals(prev)) {
for (String newTags : ADJ_READINGS) {
System.out.println(word + "\t" + word + "\t" + newTags + ":DEF");
System.out.println(word + "\t" + word + "\t" + newTags + ":IND");
System.out.println(word + "\t" + word + "\t" + newTags + ":SOL");
}
prev = word;
}
}
}
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class GermanTaggerTest method testDictionary.
@Test
public void testDictionary() throws IOException {
Dictionary dictionary = Dictionary.read(JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/de/german.dict"));
DictionaryLookup dl = new DictionaryLookup(dictionary);
for (WordData wd : dl) {
if (wd.getTag() == null || wd.getTag().length() == 0) {
System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() + " lacks a POS tag in the dictionary.");
}
}
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class MorfologikMultiSpeller method getPlainTextDictSpellerOrNull.
@Nullable
private MorfologikSpeller getPlainTextDictSpellerOrNull(BufferedReader plainTextReader, String dictPath, int maxEditDistance) throws IOException {
List<byte[]> lines = getLines(plainTextReader);
if (lines.isEmpty()) {
return null;
}
Dictionary dictionary = getDictionary(lines, dictPath);
return new MorfologikSpeller(dictionary, maxEditDistance);
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class TestTools method testDictionary.
public static void testDictionary(BaseTagger tagger, Language language) throws IOException {
Dictionary dictionary = Dictionary.read(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(tagger.getDictionaryPath()));
DictionaryLookup lookup = new DictionaryLookup(dictionary);
for (WordData wordData : lookup) {
if (wordData.getTag() == null || wordData.getTag().length() == 0) {
System.err.println("**** Warning: " + language + ": the word " + wordData.getWord() + "/" + wordData.getStem() + " lacks a POS tag in the dictionary.");
}
}
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class MorfologikMultiSpeller method getDictionary.
private Dictionary getDictionary(List<byte[]> lines, String dictPath) throws IOException {
Dictionary dictFromCache = dicPathToDict.get(dictPath);
if (dictFromCache != null) {
return dictFromCache;
} else {
// Creating the dictionary at runtime can easily take 50ms for spelling.txt files
// that are ~50KB. We don't want that overhead for every check of a short sentence,
// so we cache the result:
Collections.sort(lines, FSABuilder.LEXICAL_ORDERING);
FSA fsa = FSABuilder.build(lines);
ByteArrayOutputStream fsaOutStream = new CFSA2Serializer().serialize(fsa, new ByteArrayOutputStream());
ByteArrayInputStream fsaInStream = new ByteArrayInputStream(fsaOutStream.toByteArray());
String infoFile = dictPath.replace(".dict", ".info");
Dictionary dict = Dictionary.read(fsaInStream, JLanguageTool.getDataBroker().getFromResourceDirAsStream(infoFile));
dicPathToDict.put(dictPath, dict);
return dict;
}
}
Aggregations