Search in sources :

Example 1 with SearchPath

use of zemberek.morphology.analysis.SearchPath in project zemberek-nlp by ahmetaa.

the class WordGenerator method advance.

// for all allowed matching outgoing transitions, new paths are generated.
// Transition conditions are used for checking if a search path is allowed to pass a transition.
private List<GenerationPath> advance(GenerationPath gPath) {
    List<GenerationPath> newPaths = new ArrayList<>(2);
    // for all outgoing transitions.
    for (MorphemeTransition transition : gPath.path.getCurrentState().getOutgoing()) {
        SuffixTransition suffixTransition = (SuffixTransition) transition;
        // if there are no morphemes and this transitions surface is not empty, no need to check.
        if (gPath.morphemes.isEmpty() && suffixTransition.hasSurfaceForm()) {
            if (debugMode) {
                debugData.rejectedTransitions.put(gPath.path, new RejectedTransition(suffixTransition, "Empty surface expected."));
            }
            continue;
        }
        // if transition surface is empty, here will pass.
        if (!gPath.matches(suffixTransition)) {
            if (debugMode) {
                debugData.rejectedTransitions.put(gPath.path, new RejectedTransition(suffixTransition, "Morpheme mismatch." + suffixTransition.to.morpheme));
            }
            continue;
        }
        // if transition condition fails, add it to debug data.
        if (debugMode && suffixTransition.getCondition() != null) {
            Condition condition = suffixTransition.getCondition();
            Condition failed;
            if (condition instanceof CombinedCondition) {
                failed = ((CombinedCondition) condition).getFailingCondition(gPath.path);
            } else {
                failed = condition.accept(gPath.path) ? null : condition;
            }
            if (failed != null) {
                debugData.rejectedTransitions.put(gPath.path, new RejectedTransition(suffixTransition, "Condition → " + failed.toString()));
            }
        }
        // check conditions.
        if (!suffixTransition.canPass(gPath.path)) {
            continue;
        }
        // epsilon transition. Add and continue. Use existing attributes.
        if (!suffixTransition.hasSurfaceForm()) {
            SearchPath pCopy = gPath.path.getCopyForGeneration(new SurfaceTransition("", suffixTransition), gPath.path.getPhoneticAttributes());
            newPaths.add(gPath.copy(pCopy));
            continue;
        }
        String surface = SurfaceTransition.generateSurface(suffixTransition, gPath.path.getPhoneticAttributes());
        SurfaceTransition surfaceTransition = new SurfaceTransition(surface, suffixTransition);
        // if tail is equal to surface, no need to calculate phonetic attributes.
        AttributeSet<PhoneticAttribute> attributes = AttributesHelper.getMorphemicAttributes(surface, gPath.path.getPhoneticAttributes());
        // This is required for suffixes like `cik` and `ciğ`
        // an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
        // if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting
        // suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed.
        attributes.remove(PhoneticAttribute.CannotTerminate);
        SuffixTemplateToken lastToken = suffixTransition.getLastTemplateToken();
        if (lastToken.getType() == TemplateTokenType.LAST_VOICED) {
            attributes.add(PhoneticAttribute.ExpectsConsonant);
        } else if (lastToken.getType() == TemplateTokenType.LAST_NOT_VOICED) {
            attributes.add(PhoneticAttribute.ExpectsVowel);
            attributes.add(PhoneticAttribute.CannotTerminate);
        }
        SearchPath p = gPath.path.getCopyForGeneration(surfaceTransition, attributes);
        newPaths.add(gPath.copy(p));
    }
    return newPaths;
}
Also used : RejectedTransition(zemberek.morphology.analysis.AnalysisDebugData.RejectedTransition) CombinedCondition(zemberek.morphology.morphotactics.CombinedCondition) Condition(zemberek.morphology.morphotactics.Condition) SuffixTransition(zemberek.morphology.morphotactics.SuffixTransition) ArrayList(java.util.ArrayList) SurfaceTransition(zemberek.morphology.analysis.SurfaceTransition) CombinedCondition(zemberek.morphology.morphotactics.CombinedCondition) MorphemeTransition(zemberek.morphology.morphotactics.MorphemeTransition) PhoneticAttribute(zemberek.core.turkish.PhoneticAttribute) SearchPath(zemberek.morphology.analysis.SearchPath) SuffixTemplateToken(zemberek.morphology.analysis.SurfaceTransition.SuffixTemplateToken)

Example 2 with SearchPath

use of zemberek.morphology.analysis.SearchPath in project zemberek-nlp by ahmetaa.

the class WordGenerator method generate.

private List<Result> generate(String input, List<StemTransition> candidates, List<Morpheme> morphemes) {
    if (debugMode) {
        debugData = new AnalysisDebugData();
        debugData.input = input;
        debugData.candidateStemTransitions.addAll(candidates);
    }
    // generate initial search paths.
    List<GenerationPath> paths = new ArrayList<>();
    for (StemTransition candidate : candidates) {
        // we set the tail as " " because in morphotactics, some conditions look for tail's size
        // during graph walk. Because this is generation we let that condition pass always.
        SearchPath searchPath = SearchPath.initialPath(candidate, " ");
        List<Morpheme> morphemesInPath;
        // we skip it if it matches with the initial morpheme of the graph visiting SearchPath object.
        if (morphemes.size() > 0) {
            if (morphemes.get(0).equals(searchPath.getCurrentState().morpheme)) {
                morphemesInPath = morphemes.subList(1, morphemes.size());
            } else {
                morphemesInPath = new ArrayList<>(morphemes);
            }
        } else {
            morphemesInPath = new ArrayList<>(0);
        }
        paths.add(new GenerationPath(searchPath, morphemesInPath));
    }
    // search graph.
    List<GenerationPath> resultPaths = search(paths);
    // generate results from successful paths.
    List<Result> result = new ArrayList<>(resultPaths.size());
    for (GenerationPath path : resultPaths) {
        SingleAnalysis analysis = SingleAnalysis.fromSearchPath(path.path);
        result.add(new Result(analysis.surfaceForm(), analysis));
        if (debugMode) {
            debugData.results.add(analysis);
        }
    }
    return result;
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) AnalysisDebugData(zemberek.morphology.analysis.AnalysisDebugData) StemTransition(zemberek.morphology.morphotactics.StemTransition) Morpheme(zemberek.morphology.morphotactics.Morpheme) ArrayList(java.util.ArrayList) SearchPath(zemberek.morphology.analysis.SearchPath)

Aggregations

ArrayList (java.util.ArrayList)2 SearchPath (zemberek.morphology.analysis.SearchPath)2 PhoneticAttribute (zemberek.core.turkish.PhoneticAttribute)1 AnalysisDebugData (zemberek.morphology.analysis.AnalysisDebugData)1 RejectedTransition (zemberek.morphology.analysis.AnalysisDebugData.RejectedTransition)1 SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)1 SurfaceTransition (zemberek.morphology.analysis.SurfaceTransition)1 SuffixTemplateToken (zemberek.morphology.analysis.SurfaceTransition.SuffixTemplateToken)1 CombinedCondition (zemberek.morphology.morphotactics.CombinedCondition)1 Condition (zemberek.morphology.morphotactics.Condition)1 Morpheme (zemberek.morphology.morphotactics.Morpheme)1 MorphemeTransition (zemberek.morphology.morphotactics.MorphemeTransition)1 StemTransition (zemberek.morphology.morphotactics.StemTransition)1 SuffixTransition (zemberek.morphology.morphotactics.SuffixTransition)1