use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.
the class TurkishDictionaryLoaderTest method referenceTest2.
@Test
public void referenceTest2() {
String[] ref = { "ad", "ad [A:Doubling,InverseHarmony;Index:1]", "soy", "soyadı [A:CompoundP3sg; Roots:soy-ad]" };
RootLexicon lexicon = TurkishDictionaryLoader.load(ref);
DictionaryItem item = lexicon.getItemById("soyadı_Noun");
Assert.assertNotNull(item);
Assert.assertFalse(item.attributes.contains(RootAttribute.Doubling));
}
use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.
the class TurkishDictionaryLoaderTest method loadNounsFromFileTest.
@Test
public void loadNounsFromFileTest() throws IOException {
RootLexicon items = TurkishDictionaryLoader.load(new File(Resources.getResource("test-lexicon-nouns.txt").getFile()));
Assert.assertFalse(items.isEmpty());
for (DictionaryItem item : items) {
Assert.assertSame(item.primaryPos, Noun);
}
}
use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.
the class StemTransitionTrieBasedTest method testPrefix.
@Test
public void testPrefix() {
RootLexicon lexicon = getLexicon();
StemTransitionsTrieBased t = new StemTransitionsTrieBased(lexicon, new TurkishMorphotactics(lexicon));
List<StemTransition> matches = t.getPrefixMatches("kabağa", false);
Assert.assertEquals(3, matches.size());
Set<String> lemmas = matches.stream().map(s -> s.item.lemma).collect(Collectors.toSet());
Assert.assertTrue(TestUtil.containsAll(lemmas, "kaba", "kabağ", "kabak"));
matches = t.getPrefixMatches("kabak", false);
Assert.assertEquals(2, matches.size());
lemmas = matches.stream().map(s -> s.item.lemma).collect(Collectors.toSet());
Assert.assertTrue(TestUtil.containsAll(lemmas, "kaba", "kabak"));
matches = t.getPrefixMatches("kapak", false);
Assert.assertEquals(3, matches.size());
lemmas = matches.stream().map(s -> s.item.lemma).collect(Collectors.toSet());
Assert.assertTrue(TestUtil.containsAll(lemmas, "kapak"));
}
use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.
the class StemTransitionTrieBasedTest method testItem.
@Test
public void testItem() {
RootLexicon lexicon = getLexicon();
StemTransitionsTrieBased t = new StemTransitionsTrieBased(lexicon, new TurkishMorphotactics(lexicon));
DictionaryItem item = lexicon.getItemById("kapak_Noun");
List<StemTransition> transitions = t.getTransitions(item);
Assert.assertEquals(2, transitions.size());
Set<String> surfaces = transitions.stream().map(s -> s.surface).collect(Collectors.toSet());
Assert.assertTrue(TestUtil.containsAll(surfaces, "kapak", "kapağ"));
}
use of zemberek.morphology.lexicon.RootLexicon in project zemberek-nlp by ahmetaa.
the class DictionaryOperations method saveProperNouns.
public static void saveProperNouns() throws IOException {
// TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
RootLexicon lexicon = TurkishDictionaryLoader.loadDefaultDictionaries();
Set<String> set = new HashSet<>();
for (DictionaryItem item : lexicon) {
String lemma = item.lemma;
if (item.attributes.contains(RootAttribute.Dummy)) {
continue;
}
if (item.secondaryPos != SecondaryPos.ProperNoun) {
continue;
}
set.add(lemma);
}
List<String> list = new ArrayList<>(set);
list.sort(Turkish.STRING_COMPARATOR_ASC);
Files.write(Paths.get("zemberek.proper.vocab"), list);
}
Aggregations