use of zemberek.morphology._morphotactics.Condition in project zemberek-nlp by ahmetaa.
the class TurkishMorphotactics method connectNounStates.
/**
* Turkish Nouns always have Noun-Person-Possession-Case morphemes. Even there are no suffix
* characters. elma -> Noun:elma - A3sg:ε - Pnon:ε - Nom:ε (Third person singular, No possession,
* Nominal Case)
*/
public void connectNounStates() {
// ev-ε-?-?
noun_S.addEmpty(a3sg_S, notHave(RootAttribute.ImplicitPlural));
// ev-ler-?-?.
noun_S.add(a3pl_S, "lAr", notHave(RootAttribute.ImplicitPlural).and(notHave(RootAttribute.CompoundP3sg)));
// Allow only implicit plural `hayvanat`.
noun_S.addEmpty(a3pl_S, has(RootAttribute.ImplicitPlural));
// --- Compound Handling ---------
// for compound roots like "zeytinyağ-" generate two transitions
// NounCompound--(ε)--> a3sgCompound --(ε)--> pNonCompound_S --> Nom_S
nounCompoundRoot_S.addEmpty(a3sgCompound_S, has(RootAttribute.CompoundP3sgRoot));
a3sgCompound_S.addEmpty(pnonCompound_S);
a3sgCompound_S.add(p3pl_S, "lArI");
// ---- Proper noun handling -------
// TODO: consider adding single quote after an overhaul.
// nounProper_S.add(puncProperSeparator_S, "'");
nounProper_S.addEmpty(a3sg_S);
nounProper_S.add(a3pl_S, "lAr");
puncProperSeparator_S.addEmpty(a3sg_S);
puncProperSeparator_S.add(a3pl_S, "lAr");
// ---- For compund derivations -----------------
pnonCompound_S.addEmpty(nom_S);
nom_S.add(become_S, "lAş");
nom_S.add(acquire_S, "lAn");
// for "zeytinyağlı"
nom_S.add(with_S, "lI", new ContainsMorpheme(with, without).not());
// for "zeytinyağsız"
nom_S.add(without_S, "sIz", new ContainsMorpheme(with, without).not());
// for "zeytinyağlık"
nom_S.add(ness_S, "lI~k", not(new ContainsMorpheme(ness)));
nom_S.add(ness_S, "lI!ğ", not(new ContainsMorpheme(ness)));
// for "zeytinyağcı"
nom_S.add(agt_S, ">cI", not(new ContainsMorpheme(agt)));
// for "zeytinyağsı"
nom_S.add(justLike_S, "+msI", not(new ContainsMorpheme(justLike)));
// for "zeytinyağcık"
nom_S.add(dim_S, ">cI~k", Conditions.HAS_NO_SURFACE.andNot(new ContainsMorpheme(dim)));
nom_S.add(dim_S, ">cI!ğ", Conditions.HAS_NO_SURFACE.andNot(new ContainsMorpheme(dim)));
// "zeytinyağcağız"
nom_S.add(dim_S, "cAğIz", Conditions.HAS_NO_SURFACE);
// for compound roots like "zeytinyağ-lar-ı" generate two transition
// NounCompound--(lAr)--> a3plCompound ---> p3sg_S, P1sg etc.
nounCompoundRoot_S.add(a3plCompound_S, "lAr", has(RootAttribute.CompoundP3sgRoot));
// but for pnon connection, we use lArI
nounCompoundRoot_S.add(a3plCompound2_S, "lArI", has(RootAttribute.CompoundP3sgRoot));
a3plCompound_S.add(p3sg_S, "I").add(p2sg_S, "In").add(p1sg_S, "Im").add(p1pl_S, "ImIz").add(p2pl_S, "InIz").add(p3pl_S, "I");
// this path is used for plural analysis (A3pl+Pnon+Nom) of compound words.
a3plCompound2_S.addEmpty(pnonCompound2_S);
pnonCompound2_S.addEmpty(nom_ST);
// ------
Condition noFamily = notHave(RootAttribute.FamilyMember);
// ev-ε-ε-? Reject "annemler" etc.
a3sg_S.addEmpty(pnon_S, // ev
noFamily).add(p1sg_S, "Im", // evim
noFamily).add(p2sg_S, "In", // evin
noFamily).add(p3sg_S, "+sI", // evi, odası
noFamily).addEmpty(p3sg_S, // "zeytinyağı" has two analyses. Pnon and P3sg.
has(RootAttribute.CompoundP3sg)).add(p1pl_S, "ImIz", // evimiz
noFamily).add(p2pl_S, "InIz", // eviniz
noFamily).add(p3pl_S, "lArI", // evleri
noFamily);
// ev-ler-ε-?
a3pl_S.addEmpty(pnon_S, noFamily);
// ev-ler-im-?
a3pl_S.add(p1sg_S, "Im", noFamily).add(p2sg_S, "In", noFamily).addEmpty(p1sg_S, // for words like "annemler"
has(RootAttribute.ImplicitP1sg)).addEmpty(p2sg_S, // for words like "annenler"
has(RootAttribute.ImplicitP2sg)).add(p3sg_S, "I", noFamily).add(p1pl_S, "ImIz", noFamily).add(p2pl_S, "InIz", noFamily).add(p3pl_S, "I", noFamily);
// --- handle su - akarsu roots. ----
nounSuRoot_S.addEmpty(a3sgSu_S);
nounSuRoot_S.add(a3pl_S, "lar");
a3sgSu_S.addEmpty(pnon_S).add(p1sg_S, "yum").add(p2sg_S, "yun").add(p3sg_S, "yu").add(p1pl_S, "yumuz").add(p2pl_S, "yunuz").add(p3pl_S, "lArI");
// ev-?-ε-ε (ev, evler).
pnon_S.addEmpty(nom_ST, notHave(RootAttribute.FamilyMember));
Condition equCond = new Conditions.ContainsMorpheme(adj, futPart, presPart, narrPart, pastPart).not().or(new Conditions.ContainsMorphemeSequence(able, verb, // allow `yapabildiğince`
pastPart));
// Not allow "zetinyağı-ya" etc.
pnon_S.add(dat_ST, "+yA", // ev-e
notHave(RootAttribute.CompoundP3sg)).add(abl_ST, ">dAn", // ev-den
notHave(RootAttribute.CompoundP3sg)).add(loc_ST, ">dA", // evde
notHave(RootAttribute.CompoundP3sg)).add(acc_ST, "+yI", // evi
notHave(RootAttribute.CompoundP3sg)).add(gen_ST, "+nIn", // evin, zeytinyağının
previousStateIsNot(a3sgSu_S)).add(gen_ST, "yIn", // suyun
previousStateIs(a3sgSu_S)).add(equ_ST, ">cA", // evce
notHave(RootAttribute.CompoundP3sg).and(equCond)).add(ins_ST, // evle, zeytinyağıyla
"+ylA");
// zeytinyağı-na
pnon_S.add(dat_ST, "+nA", has(RootAttribute.CompoundP3sg)).add(abl_ST, "+ndAn", // zeytinyağı-ndan
has(RootAttribute.CompoundP3sg)).add(loc_ST, "+ndA", // zeytinyağı-nda
has(RootAttribute.CompoundP3sg)).add(equ_ST, "+ncA", // zeytinyağı-nca
has(RootAttribute.CompoundP3sg).and(equCond)).add(acc_ST, "+nI", // zeytinyağı-nı
has(RootAttribute.CompoundP3sg));
// This transition is for words like "içeri" or "dışarı".
// Those words implicitly contains Dative suffix.
// But It is also possible to add dative suffix +yA to those words such as "içeri-ye".
pnon_S.addEmpty(dat_ST, has(RootAttribute.ImplicitDative));
p1sg_S.addEmpty(// evim
nom_ST).add(dat_ST, // evime
"A").add(loc_ST, // evimde
"dA").add(abl_ST, // evimden
"dAn").add(ins_ST, // evimle
"lA").add(gen_ST, // evimin
"In").add(equ_ST, "cA", // evimce
equCond.or(new Conditions.ContainsMorpheme(pastPart))).add(acc_ST, // evimi
"I");
p2sg_S.addEmpty(// evin
nom_ST).add(dat_ST, // evine
"A").add(loc_ST, // evinde
"dA").add(abl_ST, // evinden
"dAn").add(ins_ST, // evinle
"lA").add(gen_ST, // evinin
"In").add(equ_ST, "cA", // evince
equCond.or(new Conditions.ContainsMorpheme(pastPart))).add(acc_ST, // evini
"I");
p3sg_S.addEmpty(// evi
nom_ST).add(dat_ST, // evine
"nA").add(loc_ST, // evinde
"ndA").add(abl_ST, // evinden
"ndAn").add(ins_ST, // eviyle
"ylA").add(gen_ST, // evinin
"nIn").add(equ_ST, "ncA", // evince
equCond.or(new Conditions.ContainsMorpheme(pastPart))).add(acc_ST, // evini
"nI");
p1pl_S.addEmpty(// evimiz
nom_ST).add(dat_ST, // evimize
"A").add(loc_ST, // evimizde
"dA").add(abl_ST, // evimizden
"dAn").add(ins_ST, // evimizden
"lA").add(gen_ST, // evimizin
"In").add(equ_ST, "cA", // evimizce
equCond.or(new Conditions.ContainsMorpheme(pastPart))).add(acc_ST, // evimizi
"I");
p2pl_S.addEmpty(// eviniz
nom_ST).add(dat_ST, // evinize
"A").add(loc_ST, // evinizde
"dA").add(abl_ST, // evinizden
"dAn").add(ins_ST, // evinizle
"lA").add(gen_ST, // evinizin
"In").add(equ_ST, "cA", // evinizce
equCond.or(new Conditions.ContainsMorpheme(pastPart))).add(acc_ST, // evinizi
"I");
p3pl_S.addEmpty(// evleri
nom_ST).add(dat_ST, // evlerine
"nA").add(loc_ST, // evlerinde
"ndA").add(abl_ST, // evlerinden
"ndAn").add(ins_ST, // evleriyle
"ylA").add(gen_ST, // evlerinin
"nIn").add(equ_ST, // evlerince.
"+ncA").add(acc_ST, // evlerini
"nI");
// ev-ε-ε-ε-cik (evcik). Disallow this path if visitor contains any non empty surface suffix.
// There are two almost identical suffix transitions with templates ">cI~k" and ">cI!ğ"
// This was necessary for some simplification during analysis. This way there will be only one
// surface form generated for each transition.
nom_ST.add(dim_S, ">cI~k", Conditions.HAS_NO_SURFACE);
nom_ST.add(dim_S, ">cI!ğ", Conditions.HAS_NO_SURFACE);
// ev-ε-ε-ε-ceğiz (evceğiz)
nom_ST.add(dim_S, "cAğIz", Conditions.HAS_NO_SURFACE);
// connect dim to the noun root.
dim_S.addEmpty(noun_S);
nom_ST.add(ness_S, "lI~k", Conditions.CURRENT_GROUP_EMPTY.andNot(new ContainsMorpheme(ness)));
nom_ST.add(ness_S, "lI!ğ", Conditions.CURRENT_GROUP_EMPTY.andNot(new ContainsMorpheme(ness)));
// connect `ness` to the noun root.
ness_S.addEmpty(noun_S);
nom_ST.add(agt_S, ">cI", Conditions.CURRENT_GROUP_EMPTY.andNot(new ContainsMorpheme(adj, agt)));
// connect `ness` to the noun root.
agt_S.addEmpty(noun_S);
// here we do not allow an adjective to pass here.
// such as, adj->zero->noun->ε-ε-ε->zero->Verb is not acceptable because there is already a
// adj->zero->Verb path.
Condition noun2VerbZeroDerivationCondition = Conditions.HAS_TAIL.andNot(Conditions.CURRENT_GROUP_EMPTY.and(new Conditions.LastDerivationIs(adjZeroDeriv_S)));
nom_ST.addEmpty(nounZeroDeriv_S, noun2VerbZeroDerivationCondition);
// elma-ya-yım elma-ya-ydı
dat_ST.addEmpty(nounZeroDeriv_S, noun2VerbZeroDerivationCondition);
// elma-dan-ım elma-dan-dı
abl_ST.addEmpty(nounZeroDeriv_S, noun2VerbZeroDerivationCondition);
// elma-da-yım elma-da-ydı
loc_ST.addEmpty(nounZeroDeriv_S, noun2VerbZeroDerivationCondition);
// elma-yla-yım elma-yla-ydı
ins_ST.addEmpty(nounZeroDeriv_S, noun2VerbZeroDerivationCondition);
// elma-nın-ım elma-nın-dı
gen_ST.addEmpty(nounZeroDeriv_S, noun2VerbZeroDerivationCondition);
nounZeroDeriv_S.addEmpty(nVerb_S);
// meyve-li
Condition noSurfaceAfterDerivation = new NoSurfaceAfterDerivation();
nom_ST.add(with_S, "lI", noSurfaceAfterDerivation.andNot(new ContainsMorpheme(with, without)));
nom_ST.add(without_S, "sIz", noSurfaceAfterDerivation.andNot(new ContainsMorpheme(with, without, inf1)));
nom_ST.add(justLike_S, "+msI", noSurfaceAfterDerivation.andNot(new ContainsMorpheme(justLike, futPart, pastPart, presPart, adj)));
nom_ST.add(justLike_S, "ImsI", notHave(PhoneticAttribute.LastLetterVowel).and(noSurfaceAfterDerivation).andNot(new ContainsMorpheme(justLike, futPart, pastPart, presPart, adj)));
nom_ST.add(related_S, "sAl", noSurfaceAfterDerivation.andNot(new ContainsMorpheme(with, without, related)));
// connect With to Adjective root.
with_S.addEmpty(adj_ST);
without_S.addEmpty(adj_ST);
related_S.addEmpty(adj_ST);
justLike_S.addEmpty(adj_ST);
// meyve-de-ki
Condition notRelRepetition = new HasTailSequence(rel, adj, zero, noun, a3sg, pnon, loc).not();
loc_ST.add(rel_S, "ki", notRelRepetition.andNot(new Conditions.SecondaryRootIs(SecondaryPos.Time)));
rel_S.addEmpty(adj_ST);
// for covering dünkü, anki, yarınki etc.
// TODO: Use a more general grouping, not using Secondary Pos
Condition time = Conditions.CURRENT_GROUP_EMPTY.and(new Conditions.SecondaryRootIs(SecondaryPos.Time));
DictionaryItem dun = lexicon.getItemById("dün_Noun_Time");
DictionaryItem gun = lexicon.getItemById("gün_Noun_Time");
DictionaryItem bugun = lexicon.getItemById("bugün_Noun_Time");
DictionaryItem ileri = lexicon.getItemById("ileri_Noun");
DictionaryItem geri = lexicon.getItemById("geri_Noun");
DictionaryItem ote = lexicon.getItemById("öte_Noun");
DictionaryItem beri = lexicon.getItemById("beri_Noun");
Condition time2 = Conditions.rootIsAny(dun, gun, bugun);
nom_ST.add(rel_S, "ki", time.andNot(time2));
nom_ST.add(rel_S, "ki", Conditions.rootIsAny(ileri, geri, ote, beri));
nom_ST.add(rel_S, "kü", time2.and(time));
// After Genitive suffix, Rel suffix makes a Pronoun derivation.
gen_ST.add(relToPron_S, "ki");
relToPron_S.addEmpty(pronAfterRel_S);
ContainsMorpheme verbDeriv = new ContainsMorpheme(inf1, inf2, inf3, pastPart, futPart);
nom_ST.add(become_S, "lAş", noSurfaceAfterDerivation.andNot(new ContainsMorpheme(adj)).andNot(verbDeriv));
become_S.addEmpty(verbRoot_S);
nom_ST.add(acquire_S, "lAn", noSurfaceAfterDerivation.andNot(new ContainsMorpheme(adj)).andNot(verbDeriv));
acquire_S.addEmpty(verbRoot_S);
// Inf1 mak makes noun derivation. However, it cannot get any possessive or plural suffix.
// Also cannot be followed by Dat, Gen, Acc case suffixes.
// So we create a path only for it.
nounInf1Root_S.addEmpty(a3sgInf1_S);
a3sgInf1_S.addEmpty(pnonInf1_S);
pnonInf1_S.addEmpty(nom_ST);
pnonInf1_S.add(abl_ST, "tAn");
pnonInf1_S.add(loc_ST, "tA");
pnonInf1_S.add(ins_ST, "lA");
nounActOfRoot_S.addEmpty(a3sgActOf_S);
nounActOfRoot_S.add(a3plActOf_S, "lar");
a3sgActOf_S.addEmpty(pnonActOf);
a3plActOf_S.addEmpty(pnonActOf);
pnonActOf.addEmpty(nom_ST);
}
use of zemberek.morphology._morphotactics.Condition in project zemberek-nlp by ahmetaa.
the class TurkishMorphotactics method connectVerbAfterNounAdjStates.
private void connectVerbAfterNounAdjStates() {
// elma-..-ε-yım
nVerb_S.addEmpty(nPresent_S);
// elma-ydı, çorap-tı
nVerb_S.add(nPast_S, "+y>dI");
// elma-ymış
nVerb_S.add(nNarr_S, "+ymIş");
nVerb_S.add(nCond_S, "+ysA");
nVerb_S.add(vWhile_S, "+yken");
// word "değil" is special. It contains negative suffix implicitly. Also it behaves like
// noun->Verb Zero morpheme derivation. because it cannot have most Verb suffixes.
// So we connect it to a separate root state "nVerbDegil" instead of Verb
DictionaryItem degilRoot = lexicon.getItemById("değil_Verb");
nVerbDegil_S.addEmpty(nNeg_S, rootIs(degilRoot));
// copy transitions from nVerb_S
nNeg_S.copyOutgoingTransitionsFrom(nVerb_S);
Condition noFamily = notHave(RootAttribute.FamilyMember);
// for preventing elmamım, elmamdım
// pP1sg_S, pDat_ST, pA1sg_S, pA1pl_S, pA3pl_S, pP2sg_S, pP1pl_S, pP3sg_S, pP1sg_S
// TODO: below causes "beklemedeyiz" to fail.
ContainsMorpheme verbDeriv = new ContainsMorpheme(inf1, inf2, inf3, pastPart, futPart);
Condition allowA1sgTrans = noFamily.andNot(new Conditions.ContainsMorphemeSequence(p1sg, nom)).andNot(verbDeriv);
Condition allowA2sgTrans = noFamily.andNot(new Conditions.ContainsMorphemeSequence(p2sg, nom)).andNot(verbDeriv);
Condition allowA3plTrans = noFamily.andNot(new Conditions.PreviousGroupContains(a3pl_S)).andNot(new Conditions.ContainsMorphemeSequence(p3pl, nom)).andNot(verbDeriv);
Condition allowA2plTrans = noFamily.andNot(new Conditions.ContainsMorphemeSequence(p2pl, nom)).andNot(verbDeriv);
Condition allowA1plTrans = noFamily.andNot(new Conditions.ContainsMorphemeSequence(p1sg, nom)).andNot(new Conditions.ContainsMorphemeSequence(p1pl, nom)).andNot(verbDeriv);
// elma-yım
nPresent_S.add(nA1sg_ST, "+yIm", allowA1sgTrans);
nPresent_S.add(nA2sg_ST, "sIn", allowA1sgTrans);
// elma-ε-ε-dır to non terminal A3sg. We do not allow ending with A3sg from empty Present tense.
nPresent_S.addEmpty(nA3sg_S);
// we allow `değil` to end with terminal A3sg from Present tense.
nPresent_S.addEmpty(nA3sg_ST, rootIs(degilRoot));
// elma-lar, elma-da-lar as Verb.
// TODO: consider disallowing this for "elmalar" case.
nPresent_S.add(nA3pl_ST, "lAr", notHave(RootAttribute.CompoundP3sg).andNot(new Conditions.PreviousGroupContainsMorpheme(inf1)).and(allowA3plTrans));
// elma-ydı-m. Do not allow "elmaya-yım" (Oflazer accepts this)
nPast_S.add(nA1sg_ST, "m", allowA1sgTrans);
nNarr_S.add(nA1sg_ST, "Im", allowA1sgTrans);
nPast_S.add(nA2sg_ST, "n", allowA2sgTrans);
nNarr_S.add(nA2sg_ST, "sIn", allowA2sgTrans);
nPast_S.add(nA1pl_ST, "k", allowA1plTrans);
nNarr_S.add(nA1pl_ST, "Iz", allowA1plTrans);
nPresent_S.add(nA1pl_ST, "+yIz", allowA1plTrans);
nPast_S.add(nA2pl_ST, "InIz", allowA1plTrans);
nNarr_S.add(nA2pl_ST, "sInIz", allowA1plTrans);
nPresent_S.add(nA2pl_ST, "sInIz", allowA1plTrans);
// elma-ydı-lar.
nPast_S.add(nA3pl_ST, "lAr", notHave(RootAttribute.CompoundP3sg).and(allowA3plTrans));
// elma-ymış-lar.
nNarr_S.add(nA3pl_ST, "lAr", notHave(RootAttribute.CompoundP3sg).and(allowA3plTrans));
// elma-ydı-ε
nPast_S.addEmpty(nA3sg_ST);
// elma-ymış-ε
nNarr_S.addEmpty(nA3sg_ST);
// narr+cons is allowed but not past+cond
nNarr_S.add(nCond_S, "sA");
nCond_S.add(nA1sg_ST, "m", allowA1sgTrans);
nCond_S.add(nA2sg_ST, "n", allowA2sgTrans);
nCond_S.add(nA1pl_ST, "k", allowA1plTrans);
nCond_S.add(nA2pl_ST, "nIz", allowA2plTrans);
nCond_S.addEmpty(nA3sg_ST);
nCond_S.add(nA3pl_ST, "lAr");
// for not allowing "elma-ydı-m-dır"
Condition rejectNoCopula = new CurrentGroupContainsAny(nPast_S, nCond_S, nCopBeforeA3pl_S).not();
// elma-yım-dır
nA1sg_ST.add(nCop_ST, "dIr", rejectNoCopula);
nA1pl_ST.add(nCop_ST, "dIr", rejectNoCopula);
nA3sg_S.add(nCop_ST, ">dIr", rejectNoCopula);
nA3pl_ST.add(nCop_ST, "dIr", rejectNoCopula);
// Copula can come before A3pl.
nPresent_S.add(nCopBeforeA3pl_S, ">dIr");
nCopBeforeA3pl_S.add(nA3pl_ST, "lAr");
}
use of zemberek.morphology._morphotactics.Condition in project zemberek-nlp by ahmetaa.
the class TurkishMorphotactics method connectVerbAfterPronoun.
private void connectVerbAfterPronoun() {
pvVerbRoot_S.addEmpty(pvPresent_S);
pvVerbRoot_S.add(pvPast_S, "+ydI");
pvVerbRoot_S.add(pvNarr_S, "+ymIş");
pvVerbRoot_S.add(pvCond_S, "+ysA");
Condition allowA1sgTrans = new Conditions.PreviousGroupContains(pP1sg_S).not();
Condition allowA1plTrans = new Conditions.PreviousGroupContains(pP1sg_S, pP2sg_S).not();
Condition allowA2sgTrans = new Conditions.PreviousGroupContains(pP2sg_S).not();
Condition allowA2plTrans = new Conditions.PreviousGroupContains(pP2pl_S).not();
pvPresent_S.add(pvA1sg_ST, "+yIm", allowA1sgTrans);
pvPresent_S.add(pvA2sg_ST, "sIn", allowA2sgTrans);
// We do not allow ending with A3sg from empty Present tense.
pvPresent_S.addEmpty(nA3sg_S);
pvPresent_S.add(pvA1pl_ST, "+yIz", allowA1plTrans);
pvPresent_S.add(pvA2pl_ST, "sInIz");
pvPresent_S.add(pvA3pl_ST, "lAr", new Conditions.PreviousGroupContains(pLoc_ST));
pvPast_S.add(pvA1sg_ST, "m", allowA1sgTrans);
pvPast_S.add(pvA2sg_ST, "n", allowA2sgTrans);
pvPast_S.add(pvA1pl_ST, "k", allowA1plTrans);
pvPast_S.add(pvA2pl_ST, "InIz");
pvPast_S.add(pvA3pl_ST, "lAr");
pvPast_S.addEmpty(pvA3sg_ST);
pvNarr_S.add(pvA1sg_ST, "Im", allowA1sgTrans);
pvNarr_S.add(pvA2sg_ST, "sIn", allowA2sgTrans);
pvNarr_S.add(pvA1pl_ST, "Iz", allowA1plTrans);
pvNarr_S.add(pvA2pl_ST, "sInIz");
pvNarr_S.add(pvA3pl_ST, "lAr");
pvNarr_S.addEmpty(pvA3sg_ST);
// narr+cons is allowed but not past+cond
pvNarr_S.add(pvCond_S, "sA");
pvCond_S.add(pvA1sg_ST, "m", allowA1sgTrans);
pvCond_S.add(pvA2sg_ST, "n", allowA2sgTrans);
pvCond_S.add(pvA1pl_ST, "k", allowA1plTrans);
pvCond_S.add(pvA2pl_ST, "nIz", allowA2plTrans);
pvCond_S.addEmpty(pvA3sg_ST);
pvCond_S.add(pvA3pl_ST, "lAr");
// for not allowing "elma-ydı-m-dır"
Condition rejectNoCopula = new CurrentGroupContainsAny(pvPast_S, pvCond_S, pvCopBeforeA3pl_S).not();
// elma-yım-dır
pvA1sg_ST.add(pvCop_ST, "dIr", rejectNoCopula);
pvA1pl_ST.add(pvCop_ST, "dIr", rejectNoCopula);
pvA3sg_S.add(pvCop_ST, ">dIr", rejectNoCopula);
pvA3pl_ST.add(pvCop_ST, "dIr", rejectNoCopula);
// Copula can come before A3pl.
pvPresent_S.add(pvCopBeforeA3pl_S, ">dIr");
pvCopBeforeA3pl_S.add(pvA3pl_ST, "lAr");
}
use of zemberek.morphology._morphotactics.Condition in project zemberek-nlp by ahmetaa.
the class InterpretingAnalyzer method advance.
// for all allowed matching outgoing transitions, new paths are generated.
// Transition conditions are used for checking if a search path is allowed to pass a transition.
private List<SearchPath> advance(SearchPath path, AnalysisDebugData debugData) {
List<SearchPath> newPaths = new ArrayList<>(2);
// for all outgoing transitions.
for (MorphemeTransition transition : path.currentState.getOutgoing()) {
SuffixTransition suffixTransition = (SuffixTransition) transition;
// if tail is empty and this transitions surface is not empty, no need to check.
if (path.tail.isEmpty() && suffixTransition.hasSurfaceForm()) {
if (debugData != null) {
debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Empty surface expected."));
}
continue;
}
String surface = SurfaceTransition.generate(suffixTransition, path.phoneticAttributes);
// no need to go further if generated surface form is not a prefix of the paths's tail.
if (!path.tail.startsWith(surface)) {
if (debugData != null) {
debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Surface Mismatch:" + surface));
}
continue;
}
// if transition condition fails, add it to debug data.
if (debugData != null && suffixTransition.getCondition() != null) {
Condition condition = suffixTransition.getCondition();
Condition failed;
if (condition instanceof CombinedCondition) {
failed = ((CombinedCondition) condition).getFailingCondition(path);
} else {
failed = condition.accept(path) ? null : condition;
}
if (failed != null) {
debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Condition → " + failed.toString()));
}
}
// check conditions.
if (!suffixTransition.canPass(path)) {
continue;
}
// epsilon transition. Add and continue. Use existing attributes.
if (!suffixTransition.hasSurfaceForm()) {
newPaths.add(path.getCopy(new SurfaceTransition("", suffixTransition), path.phoneticAttributes));
continue;
}
SurfaceTransition surfaceTransition = new SurfaceTransition(surface, suffixTransition);
// if tail is equal to surface, no need to calculate phonetic attributes.
AttributeSet<PhoneticAttribute> attributes = path.tail.equals(surface) ? path.phoneticAttributes.copy() : AttributesHelper.getMorphemicAttributes(surface, path.phoneticAttributes);
// This is required for suffixes like `cik` and `ciğ`
// an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
// if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting
// suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed.
attributes.remove(PhoneticAttribute.CannotTerminate);
SuffixTemplateToken lastToken = suffixTransition.getLastTemplateToken();
if (lastToken.type == TemplateTokenType.LAST_VOICED) {
attributes.add(PhoneticAttribute.ExpectsConsonant);
} else if (lastToken.type == TemplateTokenType.LAST_NOT_VOICED) {
attributes.add(PhoneticAttribute.ExpectsVowel);
attributes.add(PhoneticAttribute.CannotTerminate);
}
SearchPath p = path.getCopy(surfaceTransition, attributes);
newPaths.add(p);
}
return newPaths;
}
use of zemberek.morphology._morphotactics.Condition in project zemberek-nlp by ahmetaa.
the class TurkishMorphotactics method connectAdjectiveStates.
private void connectAdjectiveStates() {
// zero morpheme derivation. Words like "yeşil-i" requires Adj to Noun conversion.
// Since noun suffixes are not derivational a "Zero" morpheme is used for this.
// Transition has a HAS_TAIL condition because Adj->Zero->Noun+A3sg+Pnon+Nom) is not allowed.
adj_ST.addEmpty(adjZeroDeriv_S, Conditions.HAS_TAIL);
adjZeroDeriv_S.addEmpty(noun_S);
adjZeroDeriv_S.addEmpty(nVerb_S);
adj_ST.add(aLy_S, ">cA");
aLy_S.addEmpty(advRoot_ST);
adj_ST.add(aAsIf_S, ">cA", new Conditions.ContainsMorpheme(asIf, ly, agt, with, justLike).not());
aAsIf_S.addEmpty(adj_ST);
adj_ST.add(aAgt_S, ">cI", new Conditions.ContainsMorpheme(asIf, ly, agt, with, justLike).not());
aAgt_S.addEmpty(noun_S);
adj_ST.add(justLike_S, "+msI", new NoSurfaceAfterDerivation().and(new ContainsMorpheme(justLike).not()));
adj_ST.add(justLike_S, "ImsI", notHave(PhoneticAttribute.LastLetterVowel).and(new NoSurfaceAfterDerivation()).and(new ContainsMorpheme(justLike).not()));
adj_ST.add(become_S, "lAş", new NoSurfaceAfterDerivation());
adj_ST.add(acquire_S, "lAn", new NoSurfaceAfterDerivation());
Condition c1 = new Conditions.PreviousMorphemeIsAny(futPart, pastPart);
adjAfterVerb_S.addEmpty(aPnon_ST, c1);
adjAfterVerb_S.add(aP1sg_ST, "Im", c1);
adjAfterVerb_S.add(aP2sg_ST, "In", c1);
adjAfterVerb_S.add(aP3sg_ST, "I", c1);
adjAfterVerb_S.add(aP1pl_ST, "ImIz", c1);
adjAfterVerb_S.add(aP2pl_ST, "InIz", c1);
adjAfterVerb_S.add(aP3pl_ST, "lArI", c1);
adj_ST.add(ness_S, "lI~k");
adj_ST.add(ness_S, "lI!ğ");
adjAfterVerb_ST.add(ness_S, "lI~k", new Conditions.PreviousMorphemeIs(aorPart));
adjAfterVerb_ST.add(ness_S, "lI!ğ", new Conditions.PreviousMorphemeIs(aorPart));
}
Aggregations