Search in sources :

Example 6 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest3.

@Test
public void regenerateTest3() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    String word = "elmada";
    List<WordAnalysis> parseResults = parser.analyze(word);
    for (WordAnalysis parseResult : parseResults) {
        List<Suffix> suffixes = parseResult.getSuffixes();
        suffixes.remove(suffixProvider.A3sg);
        suffixes.remove(suffixProvider.Pnon);
        System.out.println(parseResult);
        String[] res = generator.generate(parseResult.dictionaryItem, suffixes);
        System.out.println(Arrays.toString(res));
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) Suffix(zemberek.morphology.lexicon.Suffix) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 7 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest2.

@Test
public void regenerateTest2() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    String word = "elmada";
    List<WordAnalysis> parseResults = parser.analyze(word);
    for (WordAnalysis parseResult : parseResults) {
        System.out.println(parseResult);
        String[] res = generator.generate(parseResult.dictionaryItem, parseResult.getSuffixes());
        System.out.println(Arrays.toString(res));
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 8 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method morphemeGenerationTest.

@Test
public void morphemeGenerationTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> testLines = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("separate-morphemes.txt").getFile())).asStringList();
    ArrayListMultimap<String, String> results = ArrayListMultimap.create(100, 2);
    for (String testLine : testLines) {
        for (String s : Splitter.on(",").trimResults().split(Strings.subStringAfterFirst(testLine, "="))) {
            results.put(Strings.subStringUntilFirst(testLine, "=").trim(), s);
        }
    }
    for (String parseable : results.keySet()) {
        List<WordAnalysis> parseResults = parser.analyze(parseable);
        for (WordAnalysis parseResult : parseResults) {
            String[] res = generator.generateMorphemes(parseResult.dictionaryItem, parseResult.getSuffixes());
            String s = Joiner.on("-").join(res);
            Assert.assertTrue("Error in:" + parseable, results.get(parseable).contains(s));
        }
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Example 9 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method speedTest.

@Test
@Ignore("Performance Test")
public void speedTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> parseables = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("parseable.txt").getFile())).asStringList();
    List<WordAnalysis> parses = new ArrayList<WordAnalysis>();
    for (String word : parseables) {
        parses.addAll(parser.analyze(word));
    }
    long start = System.currentTimeMillis();
    final long iteration = 1000;
    for (int i = 0; i < iteration; i++) {
        for (WordAnalysis parseToken : parses) {
            String[] result = generator.generate(parseToken.dictionaryItem, parseToken.getSuffixes());
            if (i == 0) {
                System.out.println(parseToken + " = " + Arrays.toString(result));
            }
        }
    }
    long elapsed = System.currentTimeMillis() - start;
    System.out.println("Elapsed:" + elapsed + " ms.");
    System.out.println("Speed:" + (iteration * 1000 * parses.size() / elapsed) + " words/second");
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) ArrayList(java.util.ArrayList) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 10 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class SimpleGeneratorTest method regenerateTest.

@Test
public void regenerateTest() throws IOException {
    DynamicLexiconGraph graph = getLexicon();
    WordAnalyzer parser = new WordAnalyzer(graph);
    SimpleGenerator generator = new SimpleGenerator(graph);
    List<String> parseables = SimpleTextReader.trimmingUTF8Reader(new File(Resources.getResource("parseable.txt").getFile())).asStringList();
    for (String parseable : parseables) {
        System.out.println(parseable);
        List<WordAnalysis> parseResults = parser.analyze(parseable);
        for (WordAnalysis parseResult : parseResults) {
            System.out.println(parseResult);
            String[] res = generator.generate(parseResult.dictionaryItem, parseResult.getSuffixes());
            System.out.println(Arrays.toString(res));
            boolean found = false;
            for (String re : res) {
                if (re.equals(parseable)) {
                    found = true;
                }
            }
            Assert.assertTrue("Error in:" + parseable + " with parse:" + parseResult, found);
        }
    }
}
Also used : WordAnalyzer(zemberek.morphology.analysis.WordAnalyzer) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) DynamicLexiconGraph(zemberek.morphology.lexicon.graph.DynamicLexiconGraph) File(java.io.File) SimpleGenerator(zemberek.morphology.generator.SimpleGenerator) Test(org.junit.Test)

Aggregations

WordAnalysis (zemberek.morphology.analysis.WordAnalysis)96 Test (org.junit.Test)42 SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)36 TurkishMorphology (zemberek.morphology.TurkishMorphology)22 ArrayList (java.util.ArrayList)21 SentenceAnalysis (zemberek.morphology.analysis.SentenceAnalysis)19 LinkedHashSet (java.util.LinkedHashSet)13 Ignore (org.junit.Ignore)13 Histogram (zemberek.core.collections.Histogram)12 Path (java.nio.file.Path)11 PrintWriter (java.io.PrintWriter)10 SentenceWordAnalysis (zemberek.morphology.analysis.SentenceWordAnalysis)10 IOException (java.io.IOException)6 HashSet (java.util.HashSet)6 List (java.util.List)6 WordAnalyzer (zemberek.morphology.analysis.WordAnalyzer)6 SimpleGenerator (zemberek.morphology.generator.SimpleGenerator)6 DictionaryItem (zemberek.morphology.lexicon.DictionaryItem)6 DynamicLexiconGraph (zemberek.morphology.lexicon.graph.DynamicLexiconGraph)6 Log (zemberek.core.logging.Log)5