use of zemberek.morphology.analysis.SearchPath in project zemberek-nlp by ahmetaa.
the class WordGenerator method advance.
// for all allowed matching outgoing transitions, new paths are generated.
// Transition conditions are used for checking if a search path is allowed to pass a transition.
private List<GenerationPath> advance(GenerationPath gPath) {
List<GenerationPath> newPaths = new ArrayList<>(2);
// for all outgoing transitions.
for (MorphemeTransition transition : gPath.path.getCurrentState().getOutgoing()) {
SuffixTransition suffixTransition = (SuffixTransition) transition;
// if there are no morphemes and this transitions surface is not empty, no need to check.
if (gPath.morphemes.isEmpty() && suffixTransition.hasSurfaceForm()) {
if (debugMode) {
debugData.rejectedTransitions.put(gPath.path, new RejectedTransition(suffixTransition, "Empty surface expected."));
}
continue;
}
// if transition surface is empty, here will pass.
if (!gPath.matches(suffixTransition)) {
if (debugMode) {
debugData.rejectedTransitions.put(gPath.path, new RejectedTransition(suffixTransition, "Morpheme mismatch." + suffixTransition.to.morpheme));
}
continue;
}
// if transition condition fails, add it to debug data.
if (debugMode && suffixTransition.getCondition() != null) {
Condition condition = suffixTransition.getCondition();
Condition failed;
if (condition instanceof CombinedCondition) {
failed = ((CombinedCondition) condition).getFailingCondition(gPath.path);
} else {
failed = condition.accept(gPath.path) ? null : condition;
}
if (failed != null) {
debugData.rejectedTransitions.put(gPath.path, new RejectedTransition(suffixTransition, "Condition → " + failed.toString()));
}
}
// check conditions.
if (!suffixTransition.canPass(gPath.path)) {
continue;
}
// epsilon transition. Add and continue. Use existing attributes.
if (!suffixTransition.hasSurfaceForm()) {
SearchPath pCopy = gPath.path.getCopyForGeneration(new SurfaceTransition("", suffixTransition), gPath.path.getPhoneticAttributes());
newPaths.add(gPath.copy(pCopy));
continue;
}
String surface = SurfaceTransition.generateSurface(suffixTransition, gPath.path.getPhoneticAttributes());
SurfaceTransition surfaceTransition = new SurfaceTransition(surface, suffixTransition);
// if tail is equal to surface, no need to calculate phonetic attributes.
AttributeSet<PhoneticAttribute> attributes = AttributesHelper.getMorphemicAttributes(surface, gPath.path.getPhoneticAttributes());
// This is required for suffixes like `cik` and `ciğ`
// an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
// if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting
// suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed.
attributes.remove(PhoneticAttribute.CannotTerminate);
SuffixTemplateToken lastToken = suffixTransition.getLastTemplateToken();
if (lastToken.getType() == TemplateTokenType.LAST_VOICED) {
attributes.add(PhoneticAttribute.ExpectsConsonant);
} else if (lastToken.getType() == TemplateTokenType.LAST_NOT_VOICED) {
attributes.add(PhoneticAttribute.ExpectsVowel);
attributes.add(PhoneticAttribute.CannotTerminate);
}
SearchPath p = gPath.path.getCopyForGeneration(surfaceTransition, attributes);
newPaths.add(gPath.copy(p));
}
return newPaths;
}
use of zemberek.morphology.analysis.SearchPath in project zemberek-nlp by ahmetaa.
the class WordGenerator method generate.
private List<Result> generate(String input, List<StemTransition> candidates, List<Morpheme> morphemes) {
if (debugMode) {
debugData = new AnalysisDebugData();
debugData.input = input;
debugData.candidateStemTransitions.addAll(candidates);
}
// generate initial search paths.
List<GenerationPath> paths = new ArrayList<>();
for (StemTransition candidate : candidates) {
// we set the tail as " " because in morphotactics, some conditions look for tail's size
// during graph walk. Because this is generation we let that condition pass always.
SearchPath searchPath = SearchPath.initialPath(candidate, " ");
List<Morpheme> morphemesInPath;
// we skip it if it matches with the initial morpheme of the graph visiting SearchPath object.
if (morphemes.size() > 0) {
if (morphemes.get(0).equals(searchPath.getCurrentState().morpheme)) {
morphemesInPath = morphemes.subList(1, morphemes.size());
} else {
morphemesInPath = new ArrayList<>(morphemes);
}
} else {
morphemesInPath = new ArrayList<>(0);
}
paths.add(new GenerationPath(searchPath, morphemesInPath));
}
// search graph.
List<GenerationPath> resultPaths = search(paths);
// generate results from successful paths.
List<Result> result = new ArrayList<>(resultPaths.size());
for (GenerationPath path : resultPaths) {
SingleAnalysis analysis = SingleAnalysis.fromSearchPath(path.path);
result.add(new Result(analysis.surfaceForm(), analysis));
if (debugMode) {
debugData.results.add(analysis);
}
}
return result;
}
Aggregations