Search in sources :

Example 6 with RootLexicon

use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.

the class TurkishDictionaryLoaderTest method referenceTest2.

@Test
public void referenceTest2() {
    String[] ref = { "ad", "ad [A:Doubling,InverseHarmony;Index:1]", "soy", "soyadı [A:CompoundP3sg; Roots:soy-ad]" };
    RootLexicon lexicon = TurkishDictionaryLoader.load(ref);
    DictionaryItem item = lexicon.getItemById("soyadı_Noun");
    Assert.assertNotNull(item);
    Assert.assertFalse(item.attributes.contains(RootAttribute.Doubling));
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) RootLexicon(zemberek.morphology.lexicon.RootLexicon) Test(org.junit.Test)

Example 7 with RootLexicon

use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.

the class TurkishDictionaryLoaderTest method loadNounsFromFileTest.

@Test
public void loadNounsFromFileTest() throws IOException {
    RootLexicon items = TurkishDictionaryLoader.load(new File(Resources.getResource("test-lexicon-nouns.txt").getFile()));
    Assert.assertFalse(items.isEmpty());
    for (DictionaryItem item : items) {
        Assert.assertSame(item.primaryPos, Noun);
    }
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) RootLexicon(zemberek.morphology.lexicon.RootLexicon) File(java.io.File) Test(org.junit.Test)

Example 8 with RootLexicon

use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.

the class StemTransitionTrieBasedTest method testPrefix.

@Test
public void testPrefix() {
    RootLexicon lexicon = getLexicon();
    StemTransitionsTrieBased t = new StemTransitionsTrieBased(lexicon, new TurkishMorphotactics(lexicon));
    List<StemTransition> matches = t.getPrefixMatches("kabağa", false);
    Assert.assertEquals(3, matches.size());
    Set<String> lemmas = matches.stream().map(s -> s.item.lemma).collect(Collectors.toSet());
    Assert.assertTrue(TestUtil.containsAll(lemmas, "kaba", "kabağ", "kabak"));
    matches = t.getPrefixMatches("kabak", false);
    Assert.assertEquals(2, matches.size());
    lemmas = matches.stream().map(s -> s.item.lemma).collect(Collectors.toSet());
    Assert.assertTrue(TestUtil.containsAll(lemmas, "kaba", "kabak"));
    matches = t.getPrefixMatches("kapak", false);
    Assert.assertEquals(3, matches.size());
    lemmas = matches.stream().map(s -> s.item.lemma).collect(Collectors.toSet());
    Assert.assertTrue(TestUtil.containsAll(lemmas, "kapak"));
}
Also used : List(java.util.List) TurkishMorphotactics(zemberek.morphology.morphotactics.TurkishMorphotactics) Set(java.util.Set) Test(org.junit.Test) TestUtil(zemberek.core.io.TestUtil) RootLexicon(zemberek.morphology.lexicon.RootLexicon) Assert(org.junit.Assert) Collectors(java.util.stream.Collectors) TurkishDictionaryLoader(zemberek.morphology.lexicon.tr.TurkishDictionaryLoader) StemTransition(zemberek.morphology.morphotactics.StemTransition) DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) StemTransition(zemberek.morphology.morphotactics.StemTransition) TurkishMorphotactics(zemberek.morphology.morphotactics.TurkishMorphotactics) RootLexicon(zemberek.morphology.lexicon.RootLexicon) Test(org.junit.Test)

Example 9 with RootLexicon

use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.

the class StemTransitionTrieBasedTest method testItem.

@Test
public void testItem() {
    RootLexicon lexicon = getLexicon();
    StemTransitionsTrieBased t = new StemTransitionsTrieBased(lexicon, new TurkishMorphotactics(lexicon));
    DictionaryItem item = lexicon.getItemById("kapak_Noun");
    List<StemTransition> transitions = t.getTransitions(item);
    Assert.assertEquals(2, transitions.size());
    Set<String> surfaces = transitions.stream().map(s -> s.surface).collect(Collectors.toSet());
    Assert.assertTrue(TestUtil.containsAll(surfaces, "kapak", "kapağ"));
}
Also used : List(java.util.List) TurkishMorphotactics(zemberek.morphology.morphotactics.TurkishMorphotactics) Set(java.util.Set) Test(org.junit.Test) TestUtil(zemberek.core.io.TestUtil) RootLexicon(zemberek.morphology.lexicon.RootLexicon) Assert(org.junit.Assert) Collectors(java.util.stream.Collectors) TurkishDictionaryLoader(zemberek.morphology.lexicon.tr.TurkishDictionaryLoader) StemTransition(zemberek.morphology.morphotactics.StemTransition) DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) StemTransition(zemberek.morphology.morphotactics.StemTransition) TurkishMorphotactics(zemberek.morphology.morphotactics.TurkishMorphotactics) RootLexicon(zemberek.morphology.lexicon.RootLexicon) Test(org.junit.Test)

Example 10 with RootLexicon

use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.

the class DictionaryOperations method saveProperNouns.

public static void saveProperNouns() throws IOException {
    // TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    RootLexicon lexicon = TurkishDictionaryLoader.loadDefaultDictionaries();
    Set<String> set = new HashSet<>();
    for (DictionaryItem item : lexicon) {
        String lemma = item.lemma;
        if (item.attributes.contains(RootAttribute.Dummy)) {
            continue;
        }
        if (item.secondaryPos != SecondaryPos.ProperNoun) {
            continue;
        }
        set.add(lemma);
    }
    List<String> list = new ArrayList<>(set);
    list.sort(Turkish.STRING_COMPARATOR_ASC);
    Files.write(Paths.get("zemberek.proper.vocab"), list);
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) ArrayList(java.util.ArrayList) RootLexicon(zemberek.morphology.lexicon.RootLexicon) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

RootLexicon (zemberek.morphology.lexicon.RootLexicon)21 DictionaryItem (zemberek.morphology.lexicon.DictionaryItem)12 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)9 TurkishDictionaryLoader (zemberek.morphology.lexicon.tr.TurkishDictionaryLoader)6 File (java.io.File)4 LinkedHashSet (java.util.LinkedHashSet)4 Stopwatch (com.google.common.base.Stopwatch)3 HashSet (java.util.HashSet)3 Ignore (org.junit.Ignore)3 TurkishMorphology (zemberek.morphology.TurkishMorphology)3 DynamicLexiconGraph (zemberek.morphology.lexicon.graph.DynamicLexiconGraph)3 TurkishMorphotactics (zemberek.morphology.morphotactics.TurkishMorphotactics)3 Path (java.nio.file.Path)2 List (java.util.List)2 Set (java.util.Set)2 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)2 ExecutorService (java.util.concurrent.ExecutorService)2 Collectors (java.util.stream.Collectors)2 Assert (org.junit.Assert)2