Search in sources :

Example 1 with DynamicLexiconGraph

use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project lucene-solr-analysis-turkish by iorixxx.

the class Zemberek3StemFilterFactory method inform.

@Override
public void inform(ResourceLoader loader) throws IOException {
    if (dictionaryFiles == null || dictionaryFiles.trim().isEmpty()) {
        this.parser = TurkishWordParserGenerator.createWithDefaults().getParser();
        // Use default dictionaries shipped with Zemberek3.
        return;
    }
    List<String> lines = new ArrayList<>();
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
        for (String file : files) {
            List<String> wlist = getLines(loader, file.trim());
            lines.addAll(wlist);
        }
    }
    if (lines.isEmpty()) {
        this.parser = TurkishWordParserGenerator.createWithDefaults().getParser();
        // Use default dictionaries shipped with Zemberek3.
        return;
    }
    SuffixProvider suffixProvider = new TurkishSuffixes();
    RootLexicon lexicon = new TurkishDictionaryLoader(suffixProvider).load(lines);
    DynamicLexiconGraph graph = new DynamicLexiconGraph(suffixProvider);
    graph.addDictionaryItems(lexicon);
    parser = new WordParser(graph);
}
Also used : SuffixProvider(zemberek.morphology.lexicon.SuffixProvider) TurkishDictionaryLoader(zemberek.morphology.lexicon.tr.TurkishDictionaryLoader) TurkishSuffixes(zemberek.morphology.lexicon.tr.TurkishSuffixes) ArrayList(java.util.ArrayList) RootLexicon(zemberek.morphology.lexicon.RootLexicon) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) WordParser(zemberek.morphology.parser.WordParser)

Example 2 with DynamicLexiconGraph

use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest3.

@Test
public void regenerateTest3() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    String word = "elmada";
    List<WordAnalysis> parseResults = parser.analyze(word);
    for (WordAnalysis parseResult : parseResults) {
        List<Suffix> suffixes = parseResult.getSuffixes();
        suffixes.remove(suffixProvider.A3sg);
        suffixes.remove(suffixProvider.Pnon);
        System.out.println(parseResult);
        String[] res = generator.generate(parseResult.dictionaryItem, suffixes);
        System.out.println(Arrays.toString(res));
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) Suffix(zemberek.morphology.lexicon.Suffix) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 3 with DynamicLexiconGraph

use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest2.

@Test
public void regenerateTest2() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    String word = "elmada";
    List<WordAnalysis> parseResults = parser.analyze(word);
    for (WordAnalysis parseResult : parseResults) {
        System.out.println(parseResult);
        String[] res = generator.generate(parseResult.dictionaryItem, parseResult.getSuffixes());
        System.out.println(Arrays.toString(res));
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 4 with DynamicLexiconGraph

use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method morphemeGenerationTest.

@Test
public void morphemeGenerationTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> testLines = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("separate-morphemes.txt").getFile())).asStringList();
    ArrayListMultimap<String, String> results = ArrayListMultimap.create(100, 2);
    for (String testLine : testLines) {
        for (String s : Splitter.on(",").trimResults().split(Strings.subStringAfterFirst(testLine, "="))) {
            results.put(Strings.subStringUntilFirst(testLine, "=").trim(), s);
        }
    }
    for (String parseable : results.keySet()) {
        List<WordAnalysis> parseResults = parser.analyze(parseable);
        for (WordAnalysis parseResult : parseResults) {
            String[] res = generator.generateMorphemes(parseResult.dictionaryItem, parseResult.getSuffixes());
            String s = Joiner.on("-").join(res);
            Assert.assertTrue("Error in:" + parseable, results.get(parseable).contains(s));
        }
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 5 with DynamicLexiconGraph

use of zemberek.morphology.lexicon.graph.DynamicLexiconGraph in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method speedTest.

@Test
@Ignore("Performance Test")
public void speedTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> parseables = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("parseable.txt").getFile())).asStringList();
    List<WordAnalysis> parses = new ArrayList<WordAnalysis>();
    for (String word : parseables) {
        parses.addAll(parser.analyze(word));
    }
    long start = System.currentTimeMillis();
    final long iteration = 1000;
    for (int i = 0; i < iteration; i++) {
        for (WordAnalysis parseToken : parses) {
            String[] result = generator.generate(parseToken.dictionaryItem, parseToken.getSuffixes());
            if (i == 0) {
                System.out.println(parseToken + " = " + Arrays.toString(result));
            }
        }
    }
    long elapsed = System.currentTimeMillis() - start;
    System.out.println("Elapsed:" + elapsed + " ms.");
    System.out.println("Speed:" + (iteration * 1000 * parses.size() / elapsed) + " words/second");
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) ArrayList(java.util.ArrayList) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

DynamicLexiconGraph (zemberek.morphology.lexicon.graph.DynamicLexiconGraph)20 Test (org.junit.Test)14 File (java.io.File)5 WordAnalysis (zemberek.morphology.analysis.WordAnalysis)5 WordAnalyzer (zemberek.morphology.analysis.WordAnalyzer)5 SimpleGenerator (zemberek.morphology.generator.SimpleGenerator)5 TurkishDictionaryLoader (zemberek.morphology.lexicon.tr.TurkishDictionaryLoader)5 RootLexicon (zemberek.morphology.lexicon.RootLexicon)3 SuffixProvider (zemberek.morphology.lexicon.SuffixProvider)3 ArrayList (java.util.ArrayList)2 Ignore (org.junit.Ignore)1 DictionaryItem (zemberek.morphology.lexicon.DictionaryItem)1 Suffix (zemberek.morphology.lexicon.Suffix)1 DynamicSuffixProvider (zemberek.morphology.lexicon.graph.DynamicSuffixProvider)1 TurkishSuffixes (zemberek.morphology.lexicon.tr.TurkishSuffixes)1 WordParser (zemberek.morphology.parser.WordParser)1