use of zemberek.morphology._analyzer.SurfaceTransition.SuffixTemplateToken in project zemberek-nlp by ahmetaa.
the class InterpretingAnalyzer method advance.
// for all allowed matching outgoing transitions, new paths are generated.
// Transition conditions are used for checking if a search path is allowed to pass a transition.
private List<SearchPath> advance(SearchPath path, AnalysisDebugData debugData) {
List<SearchPath> newPaths = new ArrayList<>(2);
// for all outgoing transitions.
for (MorphemeTransition transition : path.currentState.getOutgoing()) {
SuffixTransition suffixTransition = (SuffixTransition) transition;
// if tail is empty and this transitions surface is not empty, no need to check.
if (path.tail.isEmpty() && suffixTransition.hasSurfaceForm()) {
if (debugData != null) {
debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Empty surface expected."));
}
continue;
}
String surface = SurfaceTransition.generate(suffixTransition, path.phoneticAttributes);
// no need to go further if generated surface form is not a prefix of the paths's tail.
if (!path.tail.startsWith(surface)) {
if (debugData != null) {
debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Surface Mismatch:" + surface));
}
continue;
}
// if transition condition fails, add it to debug data.
if (debugData != null && suffixTransition.getCondition() != null) {
Condition condition = suffixTransition.getCondition();
Condition failed;
if (condition instanceof CombinedCondition) {
failed = ((CombinedCondition) condition).getFailingCondition(path);
} else {
failed = condition.accept(path) ? null : condition;
}
if (failed != null) {
debugData.rejectedTransitions.put(path, new RejectedTransition(suffixTransition, "Condition → " + failed.toString()));
}
}
// check conditions.
if (!suffixTransition.canPass(path)) {
continue;
}
// epsilon transition. Add and continue. Use existing attributes.
if (!suffixTransition.hasSurfaceForm()) {
newPaths.add(path.getCopy(new SurfaceTransition("", suffixTransition), path.phoneticAttributes));
continue;
}
SurfaceTransition surfaceTransition = new SurfaceTransition(surface, suffixTransition);
// if tail is equal to surface, no need to calculate phonetic attributes.
AttributeSet<PhoneticAttribute> attributes = path.tail.equals(surface) ? path.phoneticAttributes.copy() : AttributesHelper.getMorphemicAttributes(surface, path.phoneticAttributes);
// This is required for suffixes like `cik` and `ciğ`
// an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
// if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting
// suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed.
attributes.remove(PhoneticAttribute.CannotTerminate);
SuffixTemplateToken lastToken = suffixTransition.getLastTemplateToken();
if (lastToken.type == TemplateTokenType.LAST_VOICED) {
attributes.add(PhoneticAttribute.ExpectsConsonant);
} else if (lastToken.type == TemplateTokenType.LAST_NOT_VOICED) {
attributes.add(PhoneticAttribute.ExpectsVowel);
attributes.add(PhoneticAttribute.CannotTerminate);
}
SearchPath p = path.getCopy(surfaceTransition, attributes);
newPaths.add(p);
}
return newPaths;
}
Aggregations