Search in sources :

Example 6 with StemTransition

use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.

the class StemTransitionsBase method handleSpecialRoots.

private List<StemTransition> handleSpecialRoots(DictionaryItem item) {
    String id = item.getId();
    AttributeSet<PhoneticAttribute> originalAttrs = calculateAttributes(item.pronunciation);
    StemTransition original, modified;
    MorphemeState unmodifiedRootState = morphotactics.getRootState(item, originalAttrs);
    switch(id) {
        case "içeri_Noun":
        case "içeri_Adj":
        case "dışarı_Adj":
        case "dışarı_Noun":
        case "dışarı_Postp":
        case "yukarı_Noun":
        case "ileri_Noun":
        case "yukarı_Adj":
        case "şura_Noun":
        case "bura_Noun":
        case "ora_Noun":
            original = new StemTransition(item.root, item, originalAttrs, unmodifiedRootState);
            MorphemeState rootForModified;
            switch(item.primaryPos) {
                case Noun:
                    rootForModified = morphotactics.nounLastVowelDropRoot_S;
                    break;
                case Adjective:
                    rootForModified = morphotactics.adjLastVowelDropRoot_S;
                    break;
                // TODO: check postpositive case. Maybe it is not required.
                case PostPositive:
                    rootForModified = morphotactics.adjLastVowelDropRoot_S;
                    break;
                default:
                    throw new IllegalStateException("No root morpheme state found for " + item);
            }
            String m = item.root.substring(0, item.root.length() - 1);
            modified = new StemTransition(m, item, calculateAttributes(m), rootForModified);
            modified.getPhoneticAttributes().add(PhoneticAttribute.ExpectsConsonant);
            modified.getPhoneticAttributes().add(PhoneticAttribute.CannotTerminate);
            return Lists.newArrayList(original, modified);
        case "ben_Pron_Pers":
        case "sen_Pron_Pers":
            original = new StemTransition(item.root, item, originalAttrs, unmodifiedRootState);
            if (item.lemma.equals("ben")) {
                modified = new StemTransition("ban", item, calculateAttributes("ban"), morphotactics.pronPers_Mod_S);
            } else {
                modified = new StemTransition("san", item, calculateAttributes("san"), morphotactics.pronPers_Mod_S);
            }
            original.getPhoneticAttributes().add(PhoneticAttribute.UnModifiedPronoun);
            modified.getPhoneticAttributes().add(PhoneticAttribute.ModifiedPronoun);
            return Lists.newArrayList(original, modified);
        case "demek_Verb":
        case "yemek_Verb":
            original = new StemTransition(item.root, item, originalAttrs, morphotactics.vDeYeRoot_S);
            switch(item.lemma) {
                case "demek":
                    modified = new StemTransition("di", item, calculateAttributes("di"), morphotactics.vDeYeRoot_S);
                    break;
                default:
                    modified = new StemTransition("yi", item, calculateAttributes("yi"), morphotactics.vDeYeRoot_S);
            }
            return Lists.newArrayList(original, modified);
        case "imek_Verb":
            original = new StemTransition(item.root, item, originalAttrs, morphotactics.imekRoot_S);
            return Lists.newArrayList(original);
        case "birbiri_Pron_Quant":
        case "çoğu_Pron_Quant":
        case "öbürü_Pron_Quant":
        case "birçoğu_Pron_Quant":
            original = new StemTransition(item.root, item, originalAttrs, morphotactics.pronQuant_S);
            switch(item.lemma) {
                case "birbiri":
                    modified = new StemTransition("birbir", item, calculateAttributes("birbir"), morphotactics.pronQuantModified_S);
                    break;
                case "çoğu":
                    modified = new StemTransition("çok", item, calculateAttributes("çok"), morphotactics.pronQuantModified_S);
                    break;
                case "öbürü":
                    modified = new StemTransition("öbür", item, calculateAttributes("öbür"), morphotactics.pronQuantModified_S);
                    break;
                default:
                    modified = new StemTransition("birçok", item, calculateAttributes("birçok"), morphotactics.pronQuantModified_S);
                    break;
            }
            original.getPhoneticAttributes().add(PhoneticAttribute.UnModifiedPronoun);
            modified.getPhoneticAttributes().add(PhoneticAttribute.ModifiedPronoun);
            return Lists.newArrayList(original, modified);
        default:
            throw new IllegalArgumentException("Lexicon Item with special stem change cannot be handled:" + item);
    }
}
Also used : StemTransition(zemberek.morphology.morphotactics.StemTransition) PhoneticAttribute(zemberek.core.turkish.PhoneticAttribute) MorphemeState(zemberek.morphology.morphotactics.MorphemeState)

Example 7 with StemTransition

use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.

the class RuleBasedAnalyzer method analyze.

public List<SingleAnalysis> analyze(String input) {
    if (debugMode) {
        debugData = new AnalysisDebugData();
    }
    // get stem candidates.
    List<StemTransition> candidates = stemTransitions.getPrefixMatches(input, asciiTolerant);
    if (debugMode) {
        debugData.input = input;
        debugData.candidateStemTransitions.addAll(candidates);
    }
    // generate initial search paths.
    List<SearchPath> paths = new ArrayList<>();
    for (StemTransition candidate : candidates) {
        int length = candidate.surface.length();
        String tail = input.substring(length);
        paths.add(SearchPath.initialPath(candidate, tail));
    }
    // search graph.
    List<SearchPath> resultPaths = search(paths);
    // generate results from successful paths.
    List<SingleAnalysis> result = new ArrayList<>(resultPaths.size());
    for (SearchPath path : resultPaths) {
        SingleAnalysis analysis = SingleAnalysis.fromSearchPath(path);
        result.add(analysis);
        if (debugMode) {
            debugData.results.add(analysis);
        }
    }
    return result;
}
Also used : StemTransition(zemberek.morphology.morphotactics.StemTransition) ArrayList(java.util.ArrayList)

Example 8 with StemTransition

use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.

the class StemTransitionsBase method generate.

/**
 * Generates StemTransition objects from the dictionary item. <p>Most of the time a single
 * StemNode is generated.
 *
 * @param item DictionaryItem
 * @return one or more StemTransition objects.
 */
public List<StemTransition> generate(DictionaryItem item) {
    if (specialRoots.contains(item.id)) {
        return handleSpecialRoots(item);
    }
    if (hasModifierAttribute(item)) {
        return generateModifiedRootNodes(item);
    } else {
        AttributeSet<PhoneticAttribute> phoneticAttributes = calculateAttributes(item.pronunciation);
        StemTransition transition = new StemTransition(item.root, item, phoneticAttributes, morphotactics.getRootState(item, phoneticAttributes));
        return Lists.newArrayList(transition);
    }
}
Also used : StemTransition(zemberek.morphology.morphotactics.StemTransition) PhoneticAttribute(zemberek.core.turkish.PhoneticAttribute)

Example 9 with StemTransition

use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.

the class AnalysisDebugData method detailedInfo.

List<String> detailedInfo() {
    List<String> l = new ArrayList<>();
    l.add("----------------------");
    l.add("Debug data for input = " + input);
    if (candidateStemTransitions.size() == 0) {
        l.add("No Stem Candidates. Analysis Failed.");
    }
    l.add("Stem Candidate Transitions: ");
    for (StemTransition c : candidateStemTransitions) {
        l.add("  " + c.debugForm());
    }
    l.add("All paths:");
    for (SearchPath path : paths) {
        if (failedPaths.containsKey(path)) {
            l.add(format("  %s Fail → %s", path, failedPaths.get(path)));
        } else if (finishedPaths.contains(path)) {
            l.add(format("  %s Accepted", path));
        } else {
            l.add(format("  %s", path));
        }
        if (rejectedTransitions.containsKey(path)) {
            l.add("    Failed Transitions:");
            for (RejectedTransition r : rejectedTransitions.get(path)) {
                l.add("    " + r);
            }
        }
    }
    l.add("Paths    [" + input + "] (Surface + Morpheme State):");
    for (SearchPath result : resultPaths) {
        l.add("  " + result.toString());
    }
    l.add("Analyses [" + input + "] (Surface + Morpheme):");
    for (SingleAnalysis result : results) {
        l.add("  " + AnalysisFormatters.surfaceSequenceFormatter().format(result));
    }
    return l;
}
Also used : StemTransition(zemberek.morphology.morphotactics.StemTransition) ArrayList(java.util.ArrayList)

Example 10 with StemTransition

use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.

the class SearchPath method toString.

public String toString() {
    StemTransition st = getStemTransition();
    String morphemeStr = transitions.stream().map(SurfaceTransition::toString).collect(Collectors.joining(" + "));
    return "[(" + st.item.id + ")(-" + tail + ") " + morphemeStr + "]";
}
Also used : StemTransition(zemberek.morphology.morphotactics.StemTransition)

Aggregations

StemTransition (zemberek.morphology.morphotactics.StemTransition)12 ArrayList (java.util.ArrayList)4 PhoneticAttribute (zemberek.core.turkish.PhoneticAttribute)3 List (java.util.List)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2 Assert (org.junit.Assert)2 Test (org.junit.Test)2 TestUtil (zemberek.core.io.TestUtil)2 DictionaryItem (zemberek.morphology.lexicon.DictionaryItem)2 RootLexicon (zemberek.morphology.lexicon.RootLexicon)2 TurkishDictionaryLoader (zemberek.morphology.lexicon.tr.TurkishDictionaryLoader)2 Morpheme (zemberek.morphology.morphotactics.Morpheme)2 MorphemeState (zemberek.morphology.morphotactics.MorphemeState)2 TurkishMorphotactics (zemberek.morphology.morphotactics.TurkishMorphotactics)2 RootAttribute (zemberek.core.turkish.RootAttribute)1 TurkicLetter (zemberek.core.turkish.TurkicLetter)1 AnalysisDebugData (zemberek.morphology.analysis.AnalysisDebugData)1 SearchPath (zemberek.morphology.analysis.SearchPath)1 SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)1