use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.
the class StemTransitionsBase method handleSpecialRoots.
private List<StemTransition> handleSpecialRoots(DictionaryItem item) {
String id = item.getId();
AttributeSet<PhoneticAttribute> originalAttrs = calculateAttributes(item.pronunciation);
StemTransition original, modified;
MorphemeState unmodifiedRootState = morphotactics.getRootState(item, originalAttrs);
switch(id) {
case "içeri_Noun":
case "içeri_Adj":
case "dışarı_Adj":
case "dışarı_Noun":
case "dışarı_Postp":
case "yukarı_Noun":
case "ileri_Noun":
case "yukarı_Adj":
case "şura_Noun":
case "bura_Noun":
case "ora_Noun":
original = new StemTransition(item.root, item, originalAttrs, unmodifiedRootState);
MorphemeState rootForModified;
switch(item.primaryPos) {
case Noun:
rootForModified = morphotactics.nounLastVowelDropRoot_S;
break;
case Adjective:
rootForModified = morphotactics.adjLastVowelDropRoot_S;
break;
// TODO: check postpositive case. Maybe it is not required.
case PostPositive:
rootForModified = morphotactics.adjLastVowelDropRoot_S;
break;
default:
throw new IllegalStateException("No root morpheme state found for " + item);
}
String m = item.root.substring(0, item.root.length() - 1);
modified = new StemTransition(m, item, calculateAttributes(m), rootForModified);
modified.getPhoneticAttributes().add(PhoneticAttribute.ExpectsConsonant);
modified.getPhoneticAttributes().add(PhoneticAttribute.CannotTerminate);
return Lists.newArrayList(original, modified);
case "ben_Pron_Pers":
case "sen_Pron_Pers":
original = new StemTransition(item.root, item, originalAttrs, unmodifiedRootState);
if (item.lemma.equals("ben")) {
modified = new StemTransition("ban", item, calculateAttributes("ban"), morphotactics.pronPers_Mod_S);
} else {
modified = new StemTransition("san", item, calculateAttributes("san"), morphotactics.pronPers_Mod_S);
}
original.getPhoneticAttributes().add(PhoneticAttribute.UnModifiedPronoun);
modified.getPhoneticAttributes().add(PhoneticAttribute.ModifiedPronoun);
return Lists.newArrayList(original, modified);
case "demek_Verb":
case "yemek_Verb":
original = new StemTransition(item.root, item, originalAttrs, morphotactics.vDeYeRoot_S);
switch(item.lemma) {
case "demek":
modified = new StemTransition("di", item, calculateAttributes("di"), morphotactics.vDeYeRoot_S);
break;
default:
modified = new StemTransition("yi", item, calculateAttributes("yi"), morphotactics.vDeYeRoot_S);
}
return Lists.newArrayList(original, modified);
case "imek_Verb":
original = new StemTransition(item.root, item, originalAttrs, morphotactics.imekRoot_S);
return Lists.newArrayList(original);
case "birbiri_Pron_Quant":
case "çoğu_Pron_Quant":
case "öbürü_Pron_Quant":
case "birçoğu_Pron_Quant":
original = new StemTransition(item.root, item, originalAttrs, morphotactics.pronQuant_S);
switch(item.lemma) {
case "birbiri":
modified = new StemTransition("birbir", item, calculateAttributes("birbir"), morphotactics.pronQuantModified_S);
break;
case "çoğu":
modified = new StemTransition("çok", item, calculateAttributes("çok"), morphotactics.pronQuantModified_S);
break;
case "öbürü":
modified = new StemTransition("öbür", item, calculateAttributes("öbür"), morphotactics.pronQuantModified_S);
break;
default:
modified = new StemTransition("birçok", item, calculateAttributes("birçok"), morphotactics.pronQuantModified_S);
break;
}
original.getPhoneticAttributes().add(PhoneticAttribute.UnModifiedPronoun);
modified.getPhoneticAttributes().add(PhoneticAttribute.ModifiedPronoun);
return Lists.newArrayList(original, modified);
default:
throw new IllegalArgumentException("Lexicon Item with special stem change cannot be handled:" + item);
}
}
use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.
the class RuleBasedAnalyzer method analyze.
public List<SingleAnalysis> analyze(String input) {
if (debugMode) {
debugData = new AnalysisDebugData();
}
// get stem candidates.
List<StemTransition> candidates = stemTransitions.getPrefixMatches(input, asciiTolerant);
if (debugMode) {
debugData.input = input;
debugData.candidateStemTransitions.addAll(candidates);
}
// generate initial search paths.
List<SearchPath> paths = new ArrayList<>();
for (StemTransition candidate : candidates) {
int length = candidate.surface.length();
String tail = input.substring(length);
paths.add(SearchPath.initialPath(candidate, tail));
}
// search graph.
List<SearchPath> resultPaths = search(paths);
// generate results from successful paths.
List<SingleAnalysis> result = new ArrayList<>(resultPaths.size());
for (SearchPath path : resultPaths) {
SingleAnalysis analysis = SingleAnalysis.fromSearchPath(path);
result.add(analysis);
if (debugMode) {
debugData.results.add(analysis);
}
}
return result;
}
use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.
the class StemTransitionsBase method generate.
/**
* Generates StemTransition objects from the dictionary item. <p>Most of the time a single
* StemNode is generated.
*
* @param item DictionaryItem
* @return one or more StemTransition objects.
*/
public List<StemTransition> generate(DictionaryItem item) {
if (specialRoots.contains(item.id)) {
return handleSpecialRoots(item);
}
if (hasModifierAttribute(item)) {
return generateModifiedRootNodes(item);
} else {
AttributeSet<PhoneticAttribute> phoneticAttributes = calculateAttributes(item.pronunciation);
StemTransition transition = new StemTransition(item.root, item, phoneticAttributes, morphotactics.getRootState(item, phoneticAttributes));
return Lists.newArrayList(transition);
}
}
use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.
the class AnalysisDebugData method detailedInfo.
List<String> detailedInfo() {
List<String> l = new ArrayList<>();
l.add("----------------------");
l.add("Debug data for input = " + input);
if (candidateStemTransitions.size() == 0) {
l.add("No Stem Candidates. Analysis Failed.");
}
l.add("Stem Candidate Transitions: ");
for (StemTransition c : candidateStemTransitions) {
l.add(" " + c.debugForm());
}
l.add("All paths:");
for (SearchPath path : paths) {
if (failedPaths.containsKey(path)) {
l.add(format(" %s Fail → %s", path, failedPaths.get(path)));
} else if (finishedPaths.contains(path)) {
l.add(format(" %s Accepted", path));
} else {
l.add(format(" %s", path));
}
if (rejectedTransitions.containsKey(path)) {
l.add(" Failed Transitions:");
for (RejectedTransition r : rejectedTransitions.get(path)) {
l.add(" " + r);
}
}
}
l.add("Paths [" + input + "] (Surface + Morpheme State):");
for (SearchPath result : resultPaths) {
l.add(" " + result.toString());
}
l.add("Analyses [" + input + "] (Surface + Morpheme):");
for (SingleAnalysis result : results) {
l.add(" " + AnalysisFormatters.surfaceSequenceFormatter().format(result));
}
return l;
}
use of zemberek.morphology.morphotactics.StemTransition in project zemberek-nlp by ahmetaa.
the class SearchPath method toString.
public String toString() {
StemTransition st = getStemTransition();
String morphemeStr = transitions.stream().map(SurfaceTransition::toString).collect(Collectors.joining(" + "));
return "[(" + st.item.id + ")(-" + tail + ") " + morphemeStr + "]";
}
Aggregations