use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolver method disambiguate.
@Override
public SentenceAnalysis disambiguate(String sentence, List<WordAnalysis> allAnalyses) {
DecodeResult best = decoder.bestPath(allAnalyses);
List<SentenceWordAnalysis> l = new ArrayList<>();
for (int i = 0; i < allAnalyses.size(); i++) {
WordAnalysis wordAnalysis = allAnalyses.get(i);
SingleAnalysis analysis = best.bestParse.get(i);
l.add(new SentenceWordAnalysis(analysis, wordAnalysis));
}
return new SentenceAnalysis(sentence, l);
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class ClassificationConsole method replaceWordsWithLemma.
private String replaceWordsWithLemma(String sentence) {
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<String> res = new ArrayList<>();
for (SentenceWordAnalysis e : analysis) {
SingleAnalysis best = e.getBestAnalysis();
if (best.isUnknown()) {
res.add(e.getWordAnalysis().getInput());
continue;
}
List<String> lemmas = best.getLemmas();
res.add(lemmas.get(lemmas.size() - 1));
}
return String.join(" ", res);
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSpellChecker method getUnrankedSuggestions.
private List<String> getUnrankedSuggestions(String word) {
String normalized = TurkishAlphabet.INSTANCE.normalize(word.replaceAll("['’]", ""));
List<String> strings = decoder.getSuggestions(normalized, charMatcher);
WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(word);
if (caseType == WordAnalysisSurfaceFormatter.CaseType.MIXED_CASE || caseType == WordAnalysisSurfaceFormatter.CaseType.LOWER_CASE) {
caseType = WordAnalysisSurfaceFormatter.CaseType.DEFAULT_CASE;
}
Set<String> results = new LinkedHashSet<>(strings.size());
for (String string : strings) {
WordAnalysis analyses = morphology.analyze(string);
for (SingleAnalysis analysis : analyses) {
if (analysis.isUnknown()) {
continue;
}
if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
continue;
}
String formatted = formatter.formatToCase(analysis, caseType, getApostrophe(word));
results.add(formatted);
}
}
return new ArrayList<>(results);
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSpellChecker method check.
public boolean check(String input) {
WordAnalysis analyses = morphology.analyze(input);
WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(input);
for (SingleAnalysis analysis : analyses) {
if (analysis.isUnknown()) {
continue;
}
if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
continue;
}
String apostrophe = getApostrophe(input);
if (formatter.canBeFormatted(analysis, caseType)) {
String formatted = formatter.formatToCase(analysis, caseType, apostrophe);
if (input.equals(formatted)) {
return true;
}
}
}
return false;
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class AnalyzerController method home.
@GetMapping("/analyze")
@ResponseBody
String home(@RequestParam(name = "word", required = false, defaultValue = "") String sentence) {
List<WordAnalysis> analysisList = morphology.analyzeSentence(sentence);
StringBuilder sb = new StringBuilder("Input: " + sentence);
for (WordAnalysis wa : analysisList) {
sb.append("<div>" + wa.getInput() + "</div>");
for (SingleAnalysis sa : wa) {
sb.append("<div>" + sa.formatLong() + "</div>");
}
}
sb.append("Disambiguation result:");
SentenceAnalysis disambiguated = morphology.disambiguate(sentence, analysisList);
for (SingleAnalysis sa : disambiguated.bestAnalysis()) {
sb.append("<div>" + sa.formatLong() + "</div>");
}
return sb.toString();
}
Aggregations