use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.
the class DictionaryOperations method saveProperNouns.
public static void saveProperNouns() throws IOException {
// TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
RootLexicon lexicon = TurkishDictionaryLoader.loadDefaultDictionaries();
Set<String> set = new HashSet<>();
for (DictionaryItem item : lexicon) {
String lemma = item.lemma;
if (item.attributes.contains(RootAttribute.Dummy)) {
continue;
}
if (item.secondaryPos != SecondaryPos.ProperNoun) {
continue;
}
set.add(lemma);
}
List<String> list = new ArrayList<>(set);
list.sort(Turkish.STRING_COMPARATOR_ASC);
Files.write(Paths.get("zemberek.proper.vocab"), list);
}
use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.
the class ChangeStem method main.
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
DictionaryItem newStem = morphology.getLexicon().getMatchingItems("poğaça").get(0);
new ChangeStem(morphology).regenerate("simidime", newStem);
}
use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.
the class StemNodeGeneratorTest method empty.
@Test
public void empty() {
StemNodeGenerator generator = new StemNodeGenerator(suffixes);
DictionaryItem kitap = getDictionaryItem("kitap");
StemNode[] nodes = generator.generate(kitap);
Assert.assertEquals(2, nodes.length);
DictionaryItem odun = getDictionaryItem("odun");
StemNode[] odunNodes = generator.generate(odun);
Assert.assertEquals(1, odunNodes.length);
}
use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.
the class WordAnalyzerTest method getItems.
private List<DictionaryItem> getItems(String[] lines) {
TurkishDictionaryLoader loader = new TurkishDictionaryLoader();
List<DictionaryItem> items = new ArrayList<DictionaryItem>();
for (String line : lines) {
items.add(loader.loadFromString(line));
}
return items;
}
use of zemberek.morphology.lexicon.DictionaryItem in project zemberek-nlp by ahmetaa.
the class ZemberekNlpScripts method findZemberekMissingOrDifferent.
@Test
@Ignore("Not a Test.")
public void findZemberekMissingOrDifferent() throws IOException {
Path path = DATA_PATH.resolve("out");
LinkedHashSet<String> oSet = new LinkedHashSet<>(TextUtil.loadLinesWithText(path.resolve("dictionary-from-analysis.txt")).stream().filter(s -> !s.contains("Prop")).collect(Collectors.toList()));
TurkishMorphology parser = TurkishMorphology.createWithDefaults();
List<String> zemberekTypes = new ArrayList<>(parser.getLexicon().size());
for (DictionaryItem item : parser.getLexicon()) {
String lemma = /*item.primaryPos == PrimaryPos.Verb ? item.lemma.replaceAll("mek$|mak$", "") : */
item.lemma;
lemma = TurkishAlphabet.INSTANCE.normalizeCircumflex(lemma);
String primaryString = /*item.primaryPos == PrimaryPos.Adverb ? "Adverb" :*/
item.primaryPos.shortForm;
String pos = item.secondaryPos == null || item.secondaryPos == SecondaryPos.UnknownSec || item.secondaryPos == SecondaryPos.None ? "[P:" + primaryString + "]" : "[P:" + primaryString + "," + item.secondaryPos.shortForm + "]";
zemberekTypes.add(lemma + " " + pos);
if (pos.equals("[P:Noun]")) {
zemberekTypes.add(lemma + " [P:Adj]");
}
if (pos.equals("[P:Adj]")) {
zemberekTypes.add(lemma + " [P:Noun]");
}
}
zemberekTypes.sort(turkishCollator::compare);
Files.write(path.resolve("found-in-zemberek"), zemberekTypes);
LinkedHashSet<String> zSet = new LinkedHashSet<>(zemberekTypes);
oSet.removeAll(zSet);
Files.write(path.resolve("not-found-in-zemberek"), oSet);
}
Aggregations