use of zemberek.morphology.morphotactics.Morpheme in project zemberek-nlp by ahmetaa.
the class WordGenerator method generate.
private List<Result> generate(String input, List<StemTransition> candidates, List<Morpheme> morphemes) {
if (debugMode) {
debugData = new AnalysisDebugData();
debugData.input = input;
debugData.candidateStemTransitions.addAll(candidates);
}
// generate initial search paths.
List<GenerationPath> paths = new ArrayList<>();
for (StemTransition candidate : candidates) {
// we set the tail as " " because in morphotactics, some conditions look for tail's size
// during graph walk. Because this is generation we let that condition pass always.
SearchPath searchPath = SearchPath.initialPath(candidate, " ");
List<Morpheme> morphemesInPath;
// we skip it if it matches with the initial morpheme of the graph visiting SearchPath object.
if (morphemes.size() > 0) {
if (morphemes.get(0).equals(searchPath.getCurrentState().morpheme)) {
morphemesInPath = morphemes.subList(1, morphemes.size());
} else {
morphemesInPath = new ArrayList<>(morphemes);
}
} else {
morphemesInPath = new ArrayList<>(0);
}
paths.add(new GenerationPath(searchPath, morphemesInPath));
}
// search graph.
List<GenerationPath> resultPaths = search(paths);
// generate results from successful paths.
List<Result> result = new ArrayList<>(resultPaths.size());
for (GenerationPath path : resultPaths) {
SingleAnalysis analysis = SingleAnalysis.fromSearchPath(path.path);
result.add(new Result(analysis.surfaceForm(), analysis));
if (debugMode) {
debugData.results.add(analysis);
}
}
return result;
}
use of zemberek.morphology.morphotactics.Morpheme in project zemberek-nlp by ahmetaa.
the class SingleAnalysis method fromSearchPath.
// Here we generate a SingleAnalysis from a search path.
public static SingleAnalysis fromSearchPath(SearchPath searchPath) {
List<MorphemeData> morphemes = new ArrayList<>(searchPath.transitions.size());
int derivationCount = 0;
for (SurfaceTransition transition : searchPath.getTransitions()) {
if (transition.isDerivative()) {
derivationCount++;
}
Morpheme morpheme = transition.getMorpheme();
// we skip these two morphemes as they create visual noise and does not carry much information.
if (morpheme == TurkishMorphotactics.nom || morpheme == TurkishMorphotactics.pnon) {
continue;
}
// if empty, use the cache.
if (transition.surface.isEmpty()) {
MorphemeData morphemeData = emptyMorphemeCache.get(morpheme);
if (morphemeData == null) {
morphemeData = new MorphemeData(morpheme, "");
emptyMorphemeCache.put(morpheme, morphemeData);
}
morphemes.add(morphemeData);
continue;
}
MorphemeData suffixSurface = new MorphemeData(morpheme, transition.surface);
morphemes.add(suffixSurface);
}
int[] groupBoundaries = new int[derivationCount + 1];
// we assume there is always an IG
groupBoundaries[0] = 0;
int morphemeCounter = 0, derivationCounter = 1;
for (MorphemeData morphemeData : morphemes) {
if (morphemeData.morpheme.derivational) {
groupBoundaries[derivationCounter] = morphemeCounter;
derivationCounter++;
}
morphemeCounter++;
}
// if dictionary item is `Dummy`, use the referenced item.
// `Dummy` items are usually generated for some compound words. For example for `zeytinyağı`
// a DictionaryItem is generated with root "zeytinyağ". But here we switch to the original.
DictionaryItem item = searchPath.getDictionaryItem();
if (item.hasAttribute(RootAttribute.Dummy)) {
item = item.getReferenceItem();
}
return new SingleAnalysis(item, morphemes, groupBoundaries);
}
use of zemberek.morphology.morphotactics.Morpheme in project zemberek-nlp by ahmetaa.
the class WordGenerator method generate.
public List<Result> generate(String stem, List<String> morphemeIds) {
List<Morpheme> morphemes = new ArrayList<>();
for (String morphemeId : morphemeIds) {
Morpheme morpheme = TurkishMorphotactics.getMorpheme(morphemeId);
morphemes.add(morpheme);
}
List<StemTransition> candidates = stemTransitions.getPrefixMatches(stem, false);
return generate(stem, candidates, morphemes);
}
Aggregations