Search in sources :

Example 1 with SuffixForm

use of zemberek.morphology.lexicon.SuffixForm in project zemberek-nlp by ahmetaa.

the class DynamicSuffixProviderTest method testRegister.

@Test
public void testRegister() {
    DynamicSuffixProvider provider = new TestSuffixProvider();
    Suffix sf1 = new Suffix("sf1");
    Suffix sf2 = new Suffix("sf2");
    Suffix sf3 = new Suffix("sf3");
    Suffix sf4 = new Suffix("sf4");
    Suffix sf5 = new Suffix("sf4");
    Suffix sf6 = new Suffix("sf6");
    Suffix sf7 = new Suffix("sf6");
    Suffix sf8 = new Suffix("sf6");
    Suffix sf9 = new Suffix("sf6");
    SuffixForm frm1 = provider.getForm("fs1", sf1, "abc");
    SuffixForm frm2_1 = provider.getForm("fs2-1", sf2, "ali");
    SuffixForm frm2_2 = provider.getForm("fs2-2", sf2, "kaan");
    SuffixForm frm4 = provider.getForm("fs4", sf4, "akin");
    SuffixForm frm5 = provider.getForm("frm5", sf6, "dadada");
    SuffixForm frm6 = provider.getForm("frm6", sf9, "aguagu");
    SuffixFormTemplate tmp1 = provider.getTemplate("tmp1", sf3, TerminationType.TRANSFER);
    SuffixFormTemplate tmp2 = provider.getTemplate("tmp2", sf5, TerminationType.TRANSFER);
    SuffixFormTemplate tmp3 = provider.getTemplate("tmp3", sf7, TerminationType.TRANSFER);
    SuffixFormTemplate tmp4 = provider.getTemplate("tmp4", sf8, TerminationType.TRANSFER);
    // 
    // /--frm5.............
    // /         \         \
    // -->frm4- -.........->frm2_2
    // /          \ \          /
    // tmp2-->frm1--->tmp1------
    // \      \      /        \
    // \      .....C......-> frm2_1        --- Direct link  ... indirect link.
    // \........ /........../
    // \
    // \---- tmp3----tmp4--- frm6
    // \............./......./
    // 
    tmp2.connections.add(frm4, frm1, frm5, tmp3);
    tmp2.indirectConnections.add(tmp1, frm2_2, frm2_1, tmp4, frm6);
    frm1.connections.add(tmp1);
    frm1.indirectConnections.add(frm2_2);
    frm4.connections.add(tmp1);
    frm4.indirectConnections.add(frm2_1);
    frm5.connections.add(tmp1);
    frm5.indirectConnections.add(frm2_2);
    tmp1.connections.add(frm2_1, frm2_2);
    tmp3.connections.add(tmp4);
    tmp3.indirectConnections.add(frm6);
    tmp4.connections.add(frm6);
    // register tmp2. It should not effect the graph.
    provider.registerForm(tmp2);
    Assert.assertEquals(0, provider.getFormCount());
    // after registering frm1, there should be 2 forms registered internally. frm1 and a nullForm from tmp1
    provider.registerForm(frm1);
    Assert.assertEquals(2, provider.getFormCount());
    // after registering frm4, there should be 4 forms registered internally. frm4 and another nullForm from tmp1 with different connections.
    provider.registerForm(frm4);
    Assert.assertEquals(4, provider.getFormCount());
    // if we attempt to re-register, should not effect the graph.
    provider.registerForm(frm4);
    Assert.assertEquals(4, provider.getFormCount());
    // we register frm5. it should not generate a null morpheme from tmp1 because it was already generated when frm4 is registered.
    provider.registerForm(frm5);
    Assert.assertEquals(5, provider.getFormCount());
    provider.registerForms(frm2_1, frm2_2);
    Assert.assertEquals(7, provider.getFormCount());
    // now we generate a nullmorpheme from tmp2 and register it. We apply a constraint so frm1 is out of connections
    SuffixData constraint = new SuffixData(tmp2.connections).remove(frm1).add(tmp2.indirectConnections);
    NullSuffixForm null2_1 = provider.generateNullFormFromTemplate(tmp2, constraint);
    Assert.assertFalse(null2_1.connections.contains(frm1));
    Assert.assertTrue(null2_1.connections.contains(frm4));
    Assert.assertTrue(null2_1.connections.contains(frm5));
    provider.registerForm(null2_1.copy());
    // null morphemes for tmp3 and tm4 also will be registered internally.
    Assert.assertEquals(10, provider.getFormCount());
    // we try to do the same. it should not effect the graph.
    SuffixData constraint2 = new SuffixData(tmp2.connections).remove(frm1).add(tmp2.indirectConnections);
    NullSuffixForm null2_2 = provider.generateNullFormFromTemplate(tmp2, constraint2);
    Assert.assertEquals(null2_1, null2_2);
    provider.registerForm(null2_2.copy());
    Assert.assertEquals(10, provider.getFormCount());
    SuffixData constraint3 = new SuffixData(tmp2.allConnections());
    NullSuffixForm null2_3 = provider.generateNullFormFromTemplate(tmp2, constraint3);
    provider.registerForm(null2_3.copy());
    Assert.assertNotSame(null2_3, null2_2);
    Assert.assertEquals(11, provider.getFormCount());
}
Also used : Suffix(zemberek.morphology.lexicon.Suffix) SuffixForm(zemberek.morphology.lexicon.SuffixForm) NullSuffixForm(zemberek.morphology.lexicon.NullSuffixForm) NullSuffixForm(zemberek.morphology.lexicon.NullSuffixForm) SuffixFormTemplate(zemberek.morphology.lexicon.SuffixFormTemplate) Test(org.junit.Test)

Example 2 with SuffixForm

use of zemberek.morphology.lexicon.SuffixForm in project zemberek-nlp by ahmetaa.

the class DynamicSuffixProviderTest method testCausativeMock.

@Test
public void testCausativeMock() {
    DynamicSuffixProvider provider = new TestSuffixProvider();
    Suffix verb = new Suffix("verb");
    Suffix causative = new Suffix("causative");
    Suffix positive = new Suffix("positive");
    Suffix future = new Suffix("future");
    SuffixForm caus_t = provider.getForm("causative-t", causative, "t");
    SuffixForm caus_tir = provider.getForm("causative-tir", causative, "tir");
    SuffixForm future_acak = provider.getForm("future-acak", future, "acak");
    SuffixFormTemplate verb_temp = provider.getTemplate("verb_temp", verb);
    SuffixFormTemplate verb2verb = provider.getTemplate("verb2verb", verb);
    SuffixFormTemplate positive_temp = provider.getTemplate("positive_temp", positive);
    verb_temp.connections.add(positive_temp, verb2verb);
    verb_temp.indirectConnections.add(caus_t, caus_tir, future_acak);
    verb2verb.connections.add(caus_t, caus_tir);
    caus_t.connections.add(positive_temp, verb2verb);
    caus_t.indirectConnections.add(future_acak, caus_tir);
    caus_tir.connections.add(positive_temp, verb2verb);
    caus_tir.indirectConnections.add(future_acak, caus_t);
    positive_temp.connections.add(future_acak);
    provider.registerForms(verb_temp, verb2verb, caus_tir, caus_t, positive_temp, future_acak);
    Assert.assertEquals(6, provider.getFormCount());
    provider.dumpPath(caus_t, 2);
    provider.dumpPath(caus_tir, 2);
}
Also used : Suffix(zemberek.morphology.lexicon.Suffix) SuffixForm(zemberek.morphology.lexicon.SuffixForm) NullSuffixForm(zemberek.morphology.lexicon.NullSuffixForm) SuffixFormTemplate(zemberek.morphology.lexicon.SuffixFormTemplate) Test(org.junit.Test)

Example 3 with SuffixForm

use of zemberek.morphology.lexicon.SuffixForm in project zemberek-nlp by ahmetaa.

the class SuffixDataTest method equalityTest.

@Test
public void equalityTest() {
    Suffix suffix = new Suffix("SUFFIX");
    SuffixForm sf1 = new SuffixForm(1, "sf1", suffix, "lAr", TerminationType.TERMINAL);
    SuffixForm sf2 = new SuffixForm(1, "sf1", suffix, "lAr", TerminationType.TERMINAL);
    Assert.assertTrue(sf1.equals(sf2));
    Assert.assertTrue(sf2.equals(sf1));
    SuffixForm sf3 = new SuffixForm(3, "sf3", suffix, "k", TerminationType.TERMINAL);
    Assert.assertFalse(sf1.equals(sf3));
    sf1.connections.add(sf3);
    Assert.assertFalse(sf1.equals(sf2));
    sf2.connections.add(sf3);
    Assert.assertTrue(sf1.equals(sf2));
    sf1.connections.remove(sf3);
    SuffixForm sf4 = new SuffixForm(4, "sf4", suffix, "lAr", TerminationType.NON_TERMINAL);
    Assert.assertFalse(sf4.equals(sf2));
}
Also used : Suffix(zemberek.morphology.lexicon.Suffix) SuffixForm(zemberek.morphology.lexicon.SuffixForm) Test(org.junit.Test)

Example 4 with SuffixForm

use of zemberek.morphology.lexicon.SuffixForm in project zemberek-nlp by ahmetaa.

the class SuffixFormSetTest method equalityTest.

@Test
public void equalityTest() {
    Suffix suffix = new Suffix("SUFFIX");
    SuffixForm sf1 = new SuffixForm(1, "sf1", suffix, "lAr", TerminationType.TERMINAL);
    SuffixForm sf2 = new SuffixForm(2, "sf2", suffix, "lAr", TerminationType.TERMINAL);
    SuffixForm sf3 = new SuffixForm(3, "sf3", suffix, "k", TerminationType.TERMINAL);
    sf1.index = 0;
    sf2.index = 1;
    sf3.index = 100;
    SuffixData data1 = new SuffixData(sf1, sf2);
    Assert.assertTrue(data1.contains(sf1));
    Assert.assertTrue(data1.contains(sf2));
}
Also used : Suffix(zemberek.morphology.lexicon.Suffix) SuffixForm(zemberek.morphology.lexicon.SuffixForm) Test(org.junit.Test)

Example 5 with SuffixForm

use of zemberek.morphology.lexicon.SuffixForm in project zemberek-nlp by ahmetaa.

the class DynamicLexiconGraph method generateNodeConnections.

/**
 * This method generates connections of a SuffixSurfaceNode. A SuffixSurfaceNode is surfaceForm of
 * a SuffixForm. (Suffix form ->A1pl_lAr, SuffixSurfaceNode is lar) We already know the
 * morphotactics of SuffixForms. So we get the specific SuffixNodes that can be connected to a
 * particular SuffixSurfaceNode. Such as, SuffixForm P1sg_Im can follow A1pl_lAr. Therefore, the
 * SuffixSurfaceNode lar can only connect to "ım" surfaceNode of the P1sg_Im suffixForm. Here this
 * connection is generated, as the surfaceNode reference in the successor form is added to this
 * surfaceNode. However, if surfaceNode to be connected does not exist, it is generated as well.
 * And once it is generated and connection is provided Recursively connections to that surfaceNode
 * are also generated.
 *
 * @param surfaceNode Node that connections to successive nodes will be generated.
 */
private void generateNodeConnections(SuffixSurfaceNode surfaceNode) {
    // get the successive form sets for this surfaceNode.
    SuffixData successors = surfaceNode.suffixForm.connections;
    // iterate over form sets.
    for (SuffixForm successiveForm : successors) {
        // get the nodes for the  suffix form.
        List<SuffixSurfaceNode> nodesInSuccessor = suffixSurfaceNodeGenerator.generate(surfaceNode.attributes, surfaceNode.expectations, surfaceNode.exclusiveSuffixData, successiveForm);
        for (SuffixSurfaceNode surfaceNodeInSuccessor : nodesInSuccessor) {
            // if there are expectations for the surfaceNode, check if it matches with the attributes of the surfaceNode in successor.
            if (!surfaceNode.expectations.isEmpty()) {
                if (!expectationsMatches(surfaceNode, surfaceNodeInSuccessor)) {
                    continue;
                }
            }
            boolean recurse = false;
            if (!nodeExists(successiveForm, surfaceNodeInSuccessor)) {
                recurse = true;
            }
            surfaceNodeInSuccessor = addOrReturnExisting(successiveForm, surfaceNodeInSuccessor);
            surfaceNode.addSuccessorNode(surfaceNodeInSuccessor);
            if (recurse) {
                generateNodeConnections(surfaceNodeInSuccessor);
            }
        }
    }
}
Also used : SuffixForm(zemberek.morphology.lexicon.SuffixForm)

Aggregations

SuffixForm (zemberek.morphology.lexicon.SuffixForm)14 Test (org.junit.Test)9 Suffix (zemberek.morphology.lexicon.Suffix)8 NullSuffixForm (zemberek.morphology.lexicon.NullSuffixForm)7 SuffixFormTemplate (zemberek.morphology.lexicon.SuffixFormTemplate)5 ArrayList (java.util.ArrayList)2 Splitter (com.google.common.base.Splitter)1 Stopwatch (com.google.common.base.Stopwatch)1 HashMultimap (com.google.common.collect.HashMultimap)1 LinkedHashMultimap (com.google.common.collect.LinkedHashMultimap)1 Lists (com.google.common.collect.Lists)1 Multimap (com.google.common.collect.Multimap)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Files (java.nio.file.Files)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1 Collator (java.text.Collator)1 BitSet (java.util.BitSet)1