Search in sources :

Example 1 with WordAnalyzer

use of zemberek.morphology.analysis.WordAnalyzer in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest3.

@Test
public void regenerateTest3() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    String word = "elmada";
    List<WordAnalysis> parseResults = parser.analyze(word);
    for (WordAnalysis parseResult : parseResults) {
        List<Suffix> suffixes = parseResult.getSuffixes();
        suffixes.remove(suffixProvider.A3sg);
        suffixes.remove(suffixProvider.Pnon);
        System.out.println(parseResult);
        String[] res = generator.generate(parseResult.dictionaryItem, suffixes);
        System.out.println(Arrays.toString(res));
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) Suffix(zemberek.morphology.lexicon.Suffix) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 2 with WordAnalyzer

use of zemberek.morphology.analysis.WordAnalyzer in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest2.

@Test
public void regenerateTest2() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    String word = "elmada";
    List<WordAnalysis> parseResults = parser.analyze(word);
    for (WordAnalysis parseResult : parseResults) {
        System.out.println(parseResult);
        String[] res = generator.generate(parseResult.dictionaryItem, parseResult.getSuffixes());
        System.out.println(Arrays.toString(res));
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 3 with WordAnalyzer

use of zemberek.morphology.analysis.WordAnalyzer in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method morphemeGenerationTest.

@Test
public void morphemeGenerationTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> testLines = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("separate-morphemes.txt").getFile())).asStringList();
    ArrayListMultimap<String, String> results = ArrayListMultimap.create(100, 2);
    for (String testLine : testLines) {
        for (String s : Splitter.on(",").trimResults().split(Strings.subStringAfterFirst(testLine, "="))) {
            results.put(Strings.subStringUntilFirst(testLine, "=").trim(), s);
        }
    }
    for (String parseable : results.keySet()) {
        List<WordAnalysis> parseResults = parser.analyze(parseable);
        for (WordAnalysis parseResult : parseResults) {
            String[] res = generator.generateMorphemes(parseResult.dictionaryItem, parseResult.getSuffixes());
            String s = Joiner.on("-").join(res);
            Assert.assertTrue("Error in:" + parseable, results.get(parseable).contains(s));
        }
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 4 with WordAnalyzer

use of zemberek.morphology.analysis.WordAnalyzer in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method speedTest.

@Test
@Ignore("Performance Test")
public void speedTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> parseables = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("parseable.txt").getFile())).asStringList();
    List<WordAnalysis> parses = new ArrayList<WordAnalysis>();
    for (String word : parseables) {
        parses.addAll(parser.analyze(word));
    }
    long start = System.currentTimeMillis();
    final long iteration = 1000;
    for (int i = 0; i < iteration; i++) {
        for (WordAnalysis parseToken : parses) {
            String[] result = generator.generate(parseToken.dictionaryItem, parseToken.getSuffixes());
            if (i == 0) {
                System.out.println(parseToken + " = " + Arrays.toString(result));
            }
        }
    }
    long elapsed = System.currentTimeMillis() - start;
    System.out.println("Elapsed:" + elapsed + " ms.");
    System.out.println("Speed:" + (iteration * 1000 * parses.size() / elapsed) + " words/second");
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) ArrayList(java.util.ArrayList) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 5 with WordAnalyzer

use of zemberek.morphology.analysis.WordAnalyzer in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest.

@Test
public void regenerateTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> parseables = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("parseable.txt").getFile())).asStringList();
    for (String parseable : parseables) {
        System.out.println(parseable);
        List<WordAnalysis> parseResults = parser.analyze(parseable);
        for (WordAnalysis parseResult : parseResults) {
            System.out.println(parseResult);
            String[] res = generator.generate(parseResult.dictionaryItem, parseResult.getSuffixes());
            System.out.println(Arrays.toString(res));
            boolean found = false;
            for (String re : res) {
                if (re.equals(parseable)) {
                    found = true;
                }
            }
            Assert.assertTrue("Error in:" + parseable + " with parse:" + parseResult, found);
        }
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)5 WordAnalysis (zemberek.morphology.analysis.WordAnalysis)5 WordAnalyzer (zemberek.morphology.analysis.WordAnalyzer)5 SimpleGenerator (zemberek.morphology.generator.SimpleGenerator)5 DynamicLexiconGraph (zemberek.morphology.lexicon.graph.DynamicLexiconGraph)5 File (java.io.File)3 ArrayList (java.util.ArrayList)1 Ignore (org.junit.Ignore)1 Suffix (zemberek.morphology.lexicon.Suffix)1