Search in sources :

Example 1 with AnalysisDebugData

use of zemberek.morphology._analyzer.InterpretingAnalyzer.AnalysisDebugData in project zemberek-nlp by ahmetaa.

the class InterpretingAnalyzer method advance.

// for all allowed matching outgoing transitions, new paths are generated.
// Transition conditions are used for checking if a search path is allowed to pass a transition.
private List<SearchPath> advance(SearchPath path, AnalysisDebugData debugData) {
    List<SearchPath> newPaths = new ArrayList<>(2);
    // for all outgoing transitions.
    for (MorphemeTransition transition : path.currentState.getOutgoing()) {
        SuffixTransition suffixTransition = (SuffixTransition) transition;
        // if tail is empty and this transitions surface is not empty, no need to check.
        if (path.tail.isEmpty() && suffixTransition.hasSurfaceForm()) {
            if (debugData != null) {
                debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Empty surface expected."));
            }
            continue;
        }
        String surface = SurfaceTransition.generate(suffixTransition, path.phoneticAttributes);
        // no need to go further if generated surface form is not a prefix of the paths's tail.
        if (!path.tail.startsWith(surface)) {
            if (debugData != null) {
                debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Surface Mismatch:" + surface));
            }
            continue;
        }
        // if transition condition fails, add it to debug data.
        if (debugData != null && suffixTransition.getCondition() != null) {
            Condition condition = suffixTransition.getCondition();
            Condition failed;
            if (condition instanceof CombinedCondition) {
                failed = ((CombinedCondition) condition).getFailingCondition(path);
            } else {
                failed = condition.accept(path) ? null : condition;
            }
            if (failed != null) {
                debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Condition → " + failed.toString()));
            }
        }
        // check conditions.
        if (!suffixTransition.canPass(path)) {
            continue;
        }
        // epsilon transition. Add and continue. Use existing attributes.
        if (!suffixTransition.hasSurfaceForm()) {
            newPaths.add(path.getCopy(new SurfaceTransition("", suffixTransition), path.phoneticAttributes));
            continue;
        }
        SurfaceTransition surfaceTransition = new SurfaceTransition(surface, suffixTransition);
        // if tail is equal to surface, no need to calculate phonetic attributes.
        AttributeSet<PhoneticAttribute> attributes = path.tail.equals(surface) ? path.phoneticAttributes.copy() : AttributesHelper.getMorphemicAttributes(surface, path.phoneticAttributes);
        // This is required for suffixes like `cik` and `ciğ`
        // an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
        // if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting
        // suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed.
        attributes.remove(PhoneticAttribute.CannotTerminate);
        SuffixTemplateToken lastToken = suffixTransition.getLastTemplateToken();
        if (lastToken.type == TemplateTokenType.LAST_VOICED) {
            attributes.add(PhoneticAttribute.ExpectsConsonant);
        } else if (lastToken.type == TemplateTokenType.LAST_NOT_VOICED) {
            attributes.add(PhoneticAttribute.ExpectsVowel);
            attributes.add(PhoneticAttribute.CannotTerminate);
        }
        SearchPath p = path.getCopy(surfaceTransition, attributes);
        newPaths.add(p);
    }
    return newPaths;
}
Also used : Condition(zemberek.morphology._morphotactics.Condition) CombinedCondition(zemberek.morphology._morphotactics.CombinedCondition) SuffixTransition(zemberek.morphology._morphotactics.SuffixTransition) ArrayList(java.util.ArrayList) CombinedCondition(zemberek.morphology._morphotactics.CombinedCondition) MorphemeTransition(zemberek.morphology._morphotactics.MorphemeTransition) PhoneticAttribute(zemberek.core.turkish.PhoneticAttribute) SuffixTemplateToken(zemberek.morphology._analyzer.SurfaceTransition.SuffixTemplateToken)

Example 2 with AnalysisDebugData

use of zemberek.morphology._analyzer.InterpretingAnalyzer.AnalysisDebugData in project zemberek-nlp by ahmetaa.

the class InterpretingAnalyzer method analyze.

public List<_SingleAnalysis> analyze(String input, AnalysisDebugData debugData) {
    // get stem candidates.
    List<StemTransition> candidates = Lists.newArrayListWithCapacity(3);
    for (int i = 1; i <= input.length(); i++) {
        String stem = input.substring(0, i);
        candidates.addAll(getMatchingStemTransitions(stem));
    }
    if (debugData != null) {
        debugData.input = input;
        debugData.candidateStemTransitions.addAll(candidates);
    }
    // generate initial search paths.
    List<SearchPath> paths = new ArrayList<>();
    for (StemTransition candidate : candidates) {
        int length = candidate.surface.length();
        String head = input.substring(0, length);
        String tail = input.substring(length);
        paths.add(SearchPath.initialPath(candidate, head, tail));
    }
    // search graph.
    List<SearchPath> resultPaths = search(paths, debugData);
    // generate results from successful paths.
    List<_SingleAnalysis> result = new ArrayList<>(resultPaths.size());
    for (SearchPath path : resultPaths) {
        _SingleAnalysis analysis = _SingleAnalysis.fromSearchPath(path);
        result.add(analysis);
        if (debugData != null) {
            debugData.results.add(analysis);
        }
    }
    return result;
}
Also used : StemTransition(zemberek.morphology._morphotactics.StemTransition) ArrayList(java.util.ArrayList)

Example 3 with AnalysisDebugData

use of zemberek.morphology._analyzer.InterpretingAnalyzer.AnalysisDebugData in project zemberek-nlp by ahmetaa.

the class AnalyzerTestBase method printDebug.

private static void printDebug(InterpretingAnalyzer analyzer, String input) {
    AnalysisDebugData debugData = new AnalysisDebugData();
    analyzer.analyze(input, debugData);
    debugData.dumpToConsole();
}
Also used : AnalysisDebugData(zemberek.morphology._analyzer.InterpretingAnalyzer.AnalysisDebugData)

Aggregations

ArrayList (java.util.ArrayList)2 PhoneticAttribute (zemberek.core.turkish.PhoneticAttribute)1 AnalysisDebugData (zemberek.morphology._analyzer.InterpretingAnalyzer.AnalysisDebugData)1 SuffixTemplateToken (zemberek.morphology._analyzer.SurfaceTransition.SuffixTemplateToken)1 CombinedCondition (zemberek.morphology._morphotactics.CombinedCondition)1 Condition (zemberek.morphology._morphotactics.Condition)1 MorphemeTransition (zemberek.morphology._morphotactics.MorphemeTransition)1 StemTransition (zemberek.morphology._morphotactics.StemTransition)1 SuffixTransition (zemberek.morphology._morphotactics.SuffixTransition)1