use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project lucene-solr-analysis-turkish by iorixxx.
the class Zemberek3StemFilterFactory method inform.
@Override
public void inform(ResourceLoader loader) throws IOException {
if (dictionaryFiles == null || dictionaryFiles.trim().isEmpty()) {
this.parser = TurkishWordParserGenerator.createWithDefaults().getParser();
// Use default dictionaries shipped with Zemberek3.
return;
}
List<String> lines = new ArrayList<>();
List<String> files = splitFileNames(dictionaryFiles);
if (files.size() > 0) {
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
lines.addAll(wlist);
}
}
if (lines.isEmpty()) {
this.parser = TurkishWordParserGenerator.createWithDefaults().getParser();
// Use default dictionaries shipped with Zemberek3.
return;
}
SuffixProvider suffixProvider = new TurkishSuffixes();
RootLexicon lexicon = new TurkishDictionaryLoader(suffixProvider).load(lines);
DynamicLexiconGraph graph = new DynamicLexiconGraph(suffixProvider);
graph.addDictionaryItems(lexicon);
parser = new WordParser(graph);
}
use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.
the class SimpleGeneratorTest method regenerateTest3.
@Test
public void regenerateTest3() throws IOException {
DynamicLexiconGraph graph = getLexicon();
WordAnalyzer parser = new WordAnalyzer(graph);
SimpleGenerator generator = new SimpleGenerator(graph);
String word = "elmada";
List<WordAnalysis> parseResults = parser.analyze(word);
for (WordAnalysis parseResult : parseResults) {
List<Suffix> suffixes = parseResult.getSuffixes();
suffixes.remove(suffixProvider.A3sg);
suffixes.remove(suffixProvider.Pnon);
System.out.println(parseResult);
String[] res = generator.generate(parseResult.dictionaryItem, suffixes);
System.out.println(Arrays.toString(res));
}
}
use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.
the class SimpleGeneratorTest method regenerateTest2.
@Test
public void regenerateTest2() throws IOException {
DynamicLexiconGraph graph = getLexicon();
WordAnalyzer parser = new WordAnalyzer(graph);
SimpleGenerator generator = new SimpleGenerator(graph);
String word = "elmada";
List<WordAnalysis> parseResults = parser.analyze(word);
for (WordAnalysis parseResult : parseResults) {
System.out.println(parseResult);
String[] res = generator.generate(parseResult.dictionaryItem, parseResult.getSuffixes());
System.out.println(Arrays.toString(res));
}
}
use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.
the class SimpleGeneratorTest method morphemeGenerationTest.
@Test
public void morphemeGenerationTest() throws IOException {
DynamicLexiconGraph graph = getLexicon();
WordAnalyzer parser = new WordAnalyzer(graph);
SimpleGenerator generator = new SimpleGenerator(graph);
List<String> testLines = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("separate-morphemes.txt").getFile())).asStringList();
ArrayListMultimap<String, String> results = ArrayListMultimap.create(100, 2);
for (String testLine : testLines) {
for (String s : Splitter.on(",").trimResults().split(Strings.subStringAfterFirst(testLine, "="))) {
results.put(Strings.subStringUntilFirst(testLine, "=").trim(), s);
}
}
for (String parseable : results.keySet()) {
List<WordAnalysis> parseResults = parser.analyze(parseable);
for (WordAnalysis parseResult : parseResults) {
String[] res = generator.generateMorphemes(parseResult.dictionaryItem, parseResult.getSuffixes());
String s = Joiner.on("-").join(res);
Assert.assertTrue("Error in:" + parseable, results.get(parseable).contains(s));
}
}
}
use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.
the class SimpleGeneratorTest method speedTest.
@Test
@Ignore("Performance Test")
public void speedTest() throws IOException {
DynamicLexiconGraph graph = getLexicon();
WordAnalyzer parser = new WordAnalyzer(graph);
SimpleGenerator generator = new SimpleGenerator(graph);
List<String> parseables = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("parseable.txt").getFile())).asStringList();
List<WordAnalysis> parses = new ArrayList<WordAnalysis>();
for (String word : parseables) {
parses.addAll(parser.analyze(word));
}
long start = System.currentTimeMillis();
final long iteration = 1000;
for (int i = 0; i < iteration; i++) {
for (WordAnalysis parseToken : parses) {
String[] result = generator.generate(parseToken.dictionaryItem, parseToken.getSuffixes());
if (i == 0) {
System.out.println(parseToken + " = " + Arrays.toString(result));
}
}
}
long elapsed = System.currentTimeMillis() - start;
System.out.println("Elapsed:" + elapsed + " ms.");
System.out.println("Speed:" + (iteration * 1000 * parses.size() / elapsed) + " words/second");
}
Aggregations