use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class BaseSynthesizer method getDictionary.
/**
* Returns the {@link Dictionary} used for this synthesizer.
* The dictionary file can be defined in the {@link #BaseSynthesizer(String, String) constructor}.
* @throws IOException In case the dictionary cannot be loaded.
*/
protected Dictionary getDictionary() throws IOException {
Dictionary dict = this.dictionary;
if (dict == null) {
synchronized (this) {
dict = this.dictionary;
if (dict == null) {
URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(resourceFileName);
this.dictionary = dict = Dictionary.read(url);
}
}
}
return dict;
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class MorfologikGermanyGermanSpellerRuleTest method testCommonMisspellings.
@Test
@Ignore("help testing for https://github.com/morfologik/morfologik-stemming/issues/34")
public void testCommonMisspellings() throws IOException {
URL fsaURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl("de/hunspell/de_DE.dict");
Dictionary dictionary = Dictionary.read(fsaURL);
Speller speller = new Speller(dictionary, 2);
List<String> input = Arrays.asList((// tiny subset from https://de.wikipedia.org/wiki/Wikipedia:Liste_von_Tippfehlern
"Abenteur Abhängikeit abzuschliessen agerufen Aktivitiäten Aktzeptanz " + "Algorhitmus Algoritmus aliiert allgmein Amtsitz änlich Anstoss atakieren begrüsst Bezeichnug chinesiche " + "dannach Frima Fahrad Gebaüde gesammt Schrifsteller seperat Septmber Staddteil Rhytmen rhytmisch Maschiene " + "Lebensmittelgäschefte enstand großmutter Rytmus " + // from user feedback:
"Vorstelungsgespräch Heißhunge-Attakcen evntl. langwalig Selbstportät Erdgeshoss " + "kommmischeweise gegensatz Gesichte Suedkaukasus Englisch-sprachigige " + // from gutefrage.net:
"gerägelt Aufjedenfall ivh hällt daß muß woeder oderso anwalt").split(" "));
for (String word : input) {
check(word, speller);
}
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class GermanSpellerRuleTest method testMorfologikSpeller.
@Test
@Ignore("testing a potential bug in Morfologik")
public void testMorfologikSpeller() throws Exception {
List<byte[]> lines = new ArrayList<>();
lines.add("die".getBytes());
lines.add("ist".getBytes());
byte[] info = "fsa.dict.separator=+\nfsa.dict.encoding=utf-8\nfsa.dict.frequency-included=true".getBytes();
Dictionary dict = getDictionary(lines, new ByteArrayInputStream(info));
Speller speller = new Speller(dict, 2);
// why do both "die" and "ist" have a distance of 1 in the CandidateData constructor?
System.out.println(speller.findReplacements("is"));
}
use of morfologik.stemming.Dictionary in project languagetool by languagetool-org.
the class MorfologikGermanyGermanSpellerRuleTest method testFrequency.
@Test
@Ignore("testing for https://github.com/languagetool-org/languagetool/issues/236")
public void testFrequency() throws IOException {
URL fsaURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl("de/hunspell/de_DE.dict");
Dictionary dictionary = Dictionary.read(fsaURL);
Speller speller = new Speller(dictionary, 2);
assertThat(speller.getFrequency("der"), is(25));
assertThat(speller.getFrequency("Haus"), is(11));
assertThat(speller.getFrequency("schön"), is(9));
assertThat(speller.getFrequency("gippsnicht"), is(0));
}
Aggregations