use of zemberek.morphology.lexicon.tr.TurkishSuffixes in project lucene-solr-analysis-turkish by iorixxx.
the class Zemberek3StemFilterFactory method inform.
@Override
public void inform(ResourceLoader loader) throws IOException {
if (dictionaryFiles == null || dictionaryFiles.trim().isEmpty()) {
this.parser = TurkishWordParserGenerator.createWithDefaults().getParser();
// Use default dictionaries shipped with Zemberek3.
return;
}
List<String> lines = new ArrayList<>();
List<String> files = splitFileNames(dictionaryFiles);
if (files.size() > 0) {
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
lines.addAll(wlist);
}
}
if (lines.isEmpty()) {
this.parser = TurkishWordParserGenerator.createWithDefaults().getParser();
// Use default dictionaries shipped with Zemberek3.
return;
}
SuffixProvider suffixProvider = new TurkishSuffixes();
RootLexicon lexicon = new TurkishDictionaryLoader(suffixProvider).load(lines);
DynamicLexiconGraph graph = new DynamicLexiconGraph(suffixProvider);
graph.addDictionaryItems(lexicon);
parser = new WordParser(graph);
}
use of zemberek.morphology.lexicon.tr.TurkishSuffixes in project zemberek-nlp by ahmetaa.
the class ZemberekNlpScripts method generateSuffixNames.
@Test
@Ignore("Not a Test.")
public void generateSuffixNames() throws IOException {
TurkishSuffixes suffixes = new TurkishSuffixes();
List<SuffixForm> forms = new ArrayList<>();
for (SuffixForm form : suffixes.getAllForms()) {
if (form instanceof NullSuffixForm) {
continue;
}
forms.add(form);
}
forms.sort(Comparator.comparing(SuffixForm::getId));
List<String> result = forms.stream().map(s -> s.id).collect(Collectors.toList());
Files.write(Paths.get("suffix-list"), result);
}
Aggregations