Search in sources :

Example 1 with DictionaryItem

use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.

the class DictionaryOperations method saveProperNouns.

public static void saveProperNouns() throws IOException {
    // TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    RootLexicon lexicon = TurkishDictionaryLoader.loadDefaultDictionaries();
    Set<String> set = new HashSet<>();
    for (DictionaryItem item : lexicon) {
        String lemma = item.lemma;
        if (item.attributes.contains(RootAttribute.Dummy)) {
            continue;
        }
        if (item.secondaryPos != SecondaryPos.ProperNoun) {
            continue;
        }
        set.add(lemma);
    }
    List<String> list = new ArrayList<>(set);
    list.sort(Turkish.STRING_COMPARATOR_ASC);
    Files.write(Paths.get("zemberek.proper.vocab"), list);
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) ArrayList(java.util.ArrayList) RootLexicon(zemberek.morphology.lexicon.RootLexicon) HashSet(java.util.HashSet)

Example 2 with DictionaryItem

use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.

the class ChangeStem method main.

public static void main(String[] args) throws IOException {
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    DictionaryItem newStem = morphology.getLexicon().getMatchingItems("poğaça").get(0);
    new ChangeStem(morphology).regenerate("simidime", newStem);
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) TurkishMorphology(zemberek.morphology.analysis.tr.TurkishMorphology)

Example 3 with DictionaryItem

use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.

the class StemNodeGeneratorTest method empty.

@Test
public void empty() {
    StemNodeGenerator generator = new StemNodeGenerator(suffixes);
    DictionaryItem kitap = getDictionaryItem("kitap");
    StemNode[] nodes = generator.generate(kitap);
    Assert.assertEquals(2, nodes.length);
    DictionaryItem odun = getDictionaryItem("odun");
    StemNode[] odunNodes = generator.generate(odun);
    Assert.assertEquals(1, odunNodes.length);
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) StemNodeGenerator(zemberek.morphology.lexicon.tr.StemNodeGenerator) Test(org.junit.Test)

Example 4 with DictionaryItem

use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.

the class WordAnalyzerTest method getItems.

private List<DictionaryItem> getItems(String[] lines) {
    TurkishDictionaryLoader loader = new TurkishDictionaryLoader();
    List<DictionaryItem> items = new ArrayList<DictionaryItem>();
    for (String line : lines) {
        items.add(loader.loadFromString(line));
    }
    return items;
}
Also used : DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) TurkishDictionaryLoader(zemberek.morphology.lexicon.tr.TurkishDictionaryLoader) ArrayList(java.util.ArrayList)

Example 5 with DictionaryItem

use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.

the class ZemberekNlpScripts method findZemberekMissingOrDifferent.

@Test
@Ignore("Not a Test.")
public void findZemberekMissingOrDifferent() throws IOException {
    Path path = DATA_PATH.resolve("out");
    LinkedHashSet<String> oSet = new LinkedHashSet<>(TextUtil.loadLinesWithText(path.resolve("dictionary-from-analysis.txt")).stream().filter(s -> !s.contains("Prop")).collect(Collectors.toList()));
    TurkishMorphology parser = TurkishMorphology.createWithDefaults();
    List<String> zemberekTypes = new ArrayList<>(parser.getLexicon().size());
    for (DictionaryItem item : parser.getLexicon()) {
        String lemma = /*item.primaryPos == PrimaryPos.Verb ? item.lemma.replaceAll("mek$|mak$", "") : */
        item.lemma;
        lemma = TurkishAlphabet.INSTANCE.normalizeCircumflex(lemma);
        String primaryString = /*item.primaryPos == PrimaryPos.Adverb ? "Adverb" :*/
        item.primaryPos.shortForm;
        String pos = item.secondaryPos == null || item.secondaryPos == SecondaryPos.UnknownSec || item.secondaryPos == SecondaryPos.None ? "[P:" + primaryString + "]" : "[P:" + primaryString + "," + item.secondaryPos.shortForm + "]";
        zemberekTypes.add(lemma + " " + pos);
        if (pos.equals("[P:Noun]")) {
            zemberekTypes.add(lemma + " [P:Adj]");
        }
        if (pos.equals("[P:Adj]")) {
            zemberekTypes.add(lemma + " [P:Noun]");
        }
    }
    zemberekTypes.sort(turkishCollator::compare);
    Files.write(path.resolve("found-in-zemberek"), zemberekTypes);
    LinkedHashSet<String> zSet = new LinkedHashSet<>(zemberekTypes);
    oSet.removeAll(zSet);
    Files.write(path.resolve("not-found-in-zemberek"), oSet);
}
Also used : Path(java.nio.file.Path) LinkedHashSet(java.util.LinkedHashSet) DictionaryItem(zemberek.morphology.lexicon.DictionaryItem) ArrayList(java.util.ArrayList) TurkishMorphology(zemberek.morphology.analysis.tr.TurkishMorphology) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

DictionaryItem (zemberek.morphology.lexicon.DictionaryItem)18 ArrayList (java.util.ArrayList)8 TurkishMorphology (zemberek.morphology.analysis.tr.TurkishMorphology)8 Test (org.junit.Test)4 Path (java.nio.file.Path)3 HashSet (java.util.HashSet)3 LinkedHashSet (java.util.LinkedHashSet)3 Ignore (org.junit.Ignore)3 WordAnalysis (zemberek.morphology.analysis.WordAnalysis)3 TurkishDictionaryLoader (zemberek.morphology.lexicon.tr.TurkishDictionaryLoader)3 PrintWriter (java.io.PrintWriter)2 ContainsMorpheme (zemberek.morphology._morphotactics.Conditions.ContainsMorpheme)2 RootLexicon (zemberek.morphology.lexicon.RootLexicon)2 StemAndEnding (zemberek.morphology.structure.StemAndEnding)2 List (java.util.List)1 Matcher (java.util.regex.Matcher)1 PrimaryPos (zemberek.core.turkish.PrimaryPos)1 CurrentGroupContainsAny (zemberek.morphology._morphotactics.Conditions.CurrentGroupContainsAny)1 HasTailSequence (zemberek.morphology._morphotactics.Conditions.HasTailSequence)1 NoSurfaceAfterDerivation (zemberek.morphology._morphotactics.Conditions.NoSurfaceAfterDerivation)1