use of zemberek.morphology.lexicon.NullSuffixForm in project zemberek-nlp by ahmetaa.
the class DynamicSuffixProviderTest method testRegister.
@Test
public void testRegister() {
DynamicSuffixProvider provider = new TestSuffixProvider();
Suffix sf1 = new Suffix("sf1");
Suffix sf2 = new Suffix("sf2");
Suffix sf3 = new Suffix("sf3");
Suffix sf4 = new Suffix("sf4");
Suffix sf5 = new Suffix("sf4");
Suffix sf6 = new Suffix("sf6");
Suffix sf7 = new Suffix("sf6");
Suffix sf8 = new Suffix("sf6");
Suffix sf9 = new Suffix("sf6");
SuffixForm frm1 = provider.getForm("fs1", sf1, "abc");
SuffixForm frm2_1 = provider.getForm("fs2-1", sf2, "ali");
SuffixForm frm2_2 = provider.getForm("fs2-2", sf2, "kaan");
SuffixForm frm4 = provider.getForm("fs4", sf4, "akin");
SuffixForm frm5 = provider.getForm("frm5", sf6, "dadada");
SuffixForm frm6 = provider.getForm("frm6", sf9, "aguagu");
SuffixFormTemplate tmp1 = provider.getTemplate("tmp1", sf3, TerminationType.TRANSFER);
SuffixFormTemplate tmp2 = provider.getTemplate("tmp2", sf5, TerminationType.TRANSFER);
SuffixFormTemplate tmp3 = provider.getTemplate("tmp3", sf7, TerminationType.TRANSFER);
SuffixFormTemplate tmp4 = provider.getTemplate("tmp4", sf8, TerminationType.TRANSFER);
//
// /--frm5.............
// / \ \
// -->frm4- -.........->frm2_2
// / \ \ /
// tmp2-->frm1--->tmp1------
// \ \ / \
// \ .....C......-> frm2_1 --- Direct link ... indirect link.
// \........ /........../
// \
// \---- tmp3----tmp4--- frm6
// \............./......./
//
tmp2.connections.add(frm4, frm1, frm5, tmp3);
tmp2.indirectConnections.add(tmp1, frm2_2, frm2_1, tmp4, frm6);
frm1.connections.add(tmp1);
frm1.indirectConnections.add(frm2_2);
frm4.connections.add(tmp1);
frm4.indirectConnections.add(frm2_1);
frm5.connections.add(tmp1);
frm5.indirectConnections.add(frm2_2);
tmp1.connections.add(frm2_1, frm2_2);
tmp3.connections.add(tmp4);
tmp3.indirectConnections.add(frm6);
tmp4.connections.add(frm6);
// register tmp2. It should not effect the graph.
provider.registerForm(tmp2);
Assert.assertEquals(0, provider.getFormCount());
// after registering frm1, there should be 2 forms registered internally. frm1 and a nullForm from tmp1
provider.registerForm(frm1);
Assert.assertEquals(2, provider.getFormCount());
// after registering frm4, there should be 4 forms registered internally. frm4 and another nullForm from tmp1 with different connections.
provider.registerForm(frm4);
Assert.assertEquals(4, provider.getFormCount());
// if we attempt to re-register, should not effect the graph.
provider.registerForm(frm4);
Assert.assertEquals(4, provider.getFormCount());
// we register frm5. it should not generate a null morpheme from tmp1 because it was already generated when frm4 is registered.
provider.registerForm(frm5);
Assert.assertEquals(5, provider.getFormCount());
provider.registerForms(frm2_1, frm2_2);
Assert.assertEquals(7, provider.getFormCount());
// now we generate a nullmorpheme from tmp2 and register it. We apply a constraint so frm1 is out of connections
SuffixData constraint = new SuffixData(tmp2.connections).remove(frm1).add(tmp2.indirectConnections);
NullSuffixForm null2_1 = provider.generateNullFormFromTemplate(tmp2, constraint);
Assert.assertFalse(null2_1.connections.contains(frm1));
Assert.assertTrue(null2_1.connections.contains(frm4));
Assert.assertTrue(null2_1.connections.contains(frm5));
provider.registerForm(null2_1.copy());
// null morphemes for tmp3 and tm4 also will be registered internally.
Assert.assertEquals(10, provider.getFormCount());
// we try to do the same. it should not effect the graph.
SuffixData constraint2 = new SuffixData(tmp2.connections).remove(frm1).add(tmp2.indirectConnections);
NullSuffixForm null2_2 = provider.generateNullFormFromTemplate(tmp2, constraint2);
Assert.assertEquals(null2_1, null2_2);
provider.registerForm(null2_2.copy());
Assert.assertEquals(10, provider.getFormCount());
SuffixData constraint3 = new SuffixData(tmp2.allConnections());
NullSuffixForm null2_3 = provider.generateNullFormFromTemplate(tmp2, constraint3);
provider.registerForm(null2_3.copy());
Assert.assertNotSame(null2_3, null2_2);
Assert.assertEquals(11, provider.getFormCount());
}
use of zemberek.morphology.lexicon.NullSuffixForm in project zemberek-nlp by ahmetaa.
the class DynamicSuffixProvider method generateNullFormFromTemplate.
protected NullSuffixForm generateNullFormFromTemplate(SuffixFormTemplate templateForm, SuffixData constraints) {
NullSuffixForm nullForm = new NullSuffixForm(-1, "", templateForm);
nullForm.connections = new SuffixData(templateForm.connections).retain(constraints);
nullForm.indirectConnections = new SuffixData(templateForm.indirectConnections).retain(constraints);
if (nullFormsUnprocessed.containsKey(nullForm)) {
return nullFormsUnprocessed.get(nullForm);
} else {
nullForm.index = getNewIndex();
nullForm.id = idMaker.get(templateForm.id);
nullFormsUnprocessed.put(nullForm, nullForm);
return nullForm;
}
}
use of zemberek.morphology.lexicon.NullSuffixForm in project zemberek-nlp by ahmetaa.
the class DynamicSuffixProvider method registerForm.
protected void registerForm(SuffixForm formSet) {
// duplicates of newly generated FormSets.
if (formSet instanceof SuffixFormTemplate) {
formLookupByName.put(formSet.getId(), formSet);
return;
}
if (suffixForms.containsKey(formSet)) {
return;
}
SuffixData allConnections = formSet.allConnections();
List<SuffixForm> templateFormsToRemove = new ArrayList<>();
List<SuffixForm> nullFormsToRegister = new ArrayList<>();
for (SuffixForm connection : formSet.connections) {
if (connection instanceof SuffixFormTemplate) {
NullSuffixForm nullForm = generateNullFormFromTemplate((SuffixFormTemplate) connection, new SuffixData(allConnections)).copy();
nullFormsToRegister.add(nullForm);
templateFormsToRemove.add(connection);
}
}
formSet.connections.remove(templateFormsToRemove);
// we dont need indirect connection data anymore.
formSet.indirectConnections.clear();
formSet.connections.add(nullFormsToRegister);
if (formSet.index != -1) {
formSet.index = getNewIndex();
}
suffixForms.put(formSet, formSet);
formLookupByName.put(formSet.getId(), formSet);
for (SuffixForm form : nullFormsToRegister) {
registerForm(form);
}
}
use of zemberek.morphology.lexicon.NullSuffixForm in project zemberek-nlp by ahmetaa.
the class TurkishSuffixes method getRootSet.
@Override
public SuffixForm getRootSet(DictionaryItem item, SuffixData suffixConstraint) {
if (suffixConstraint.isEmpty()) {
switch(item.primaryPos) {
case Noun:
if (item.hasAttribute(RootAttribute.CompoundP3sg)) {
return Noun_Comp_P3sg;
}
if (item.hasAttribute(RootAttribute.CompoundP3sgRoot)) {
return Noun_Comp_P3sg_Root;
}
switch(item.secondaryPos) {
case ProperNoun:
return ProperNoun_Default;
case Time:
return Noun_Time_Default;
default:
return Noun_Default;
}
case Adjective:
return Adj_Default;
case Verb:
return Verb_Default;
case Adverb:
return Adv_Default;
case Numeral:
return Numeral_Default;
case Interjection:
return Interj_Default;
case Question:
return Ques_Default;
case Conjunction:
return Conj_Default;
case PostPositive:
return Postp_Default;
case Punctuation:
return Punc_Default;
case Determiner:
return Det_Default;
case Duplicator:
return Dup_Default;
case Pronoun:
switch(item.secondaryPos) {
case DemonstrativePron:
return DemonsPron_Default;
case QuantitivePron:
return QuantPron_Default;
case QuestionPron:
return QuesPron_Default;
case ReflexivePron:
return ReflexPron_Default;
default:
return PersPron_Default;
}
default:
return Noun_Default;
}
} else {
SuffixFormTemplate template;
switch(item.primaryPos) {
case Noun:
template = Noun_TEMPLATE;
break;
case Adjective:
template = Adj_TEMPLATE;
break;
case Verb:
template = Verb_TEMPLATE;
break;
case PostPositive:
template = Postp_Template;
break;
case Pronoun:
if (item.secondaryPos == SecondaryPos.DemonstrativePron) {
template = DemonsPron_TEMPLATE;
} else if (item.secondaryPos == SecondaryPos.QuantitivePron) {
template = QuantPron_TEMPLATE;
} else if (item.secondaryPos == SecondaryPos.QuestionPron) {
template = QuesPron_TEMPLATE;
} else {
return PersPron_TEMPLATE;
}
break;
default:
template = Noun_TEMPLATE;
}
NullSuffixForm copy = generateNullFormFromTemplate(template, suffixConstraint).copy();
registerForm(copy);
return copy;
}
}
use of zemberek.morphology.lexicon.NullSuffixForm in project zemberek-nlp by ahmetaa.
the class ZemberekNlpScripts method generateSuffixNames.
@Test
@Ignore("Not a Test.")
public void generateSuffixNames() throws IOException {
TurkishSuffixes suffixes = new TurkishSuffixes();
List<SuffixForm> forms = new ArrayList<>();
for (SuffixForm form : suffixes.getAllForms()) {
if (form instanceof NullSuffixForm) {
continue;
}
forms.add(form);
}
forms.sort(Comparator.comparing(SuffixForm::getId));
List<String> result = forms.stream().map(s -> s.id).collect(Collectors.toList());
Files.write(Paths.get("suffix-list"), result);
}
Aggregations