use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSpellChecker method getUnrankedSuggestions.
private List<String> getUnrankedSuggestions(String word) {
String normalized = TurkishAlphabet.INSTANCE.normalize(word.replaceAll("['’]", ""));
List<String> strings = decoder.getSuggestions(normalized, charMatcher);
WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(word);
if (caseType == WordAnalysisSurfaceFormatter.CaseType.MIXED_CASE || caseType == WordAnalysisSurfaceFormatter.CaseType.LOWER_CASE) {
caseType = WordAnalysisSurfaceFormatter.CaseType.DEFAULT_CASE;
}
Set<String> results = new LinkedHashSet<>(strings.size());
for (String string : strings) {
WordAnalysis analyses = morphology.analyze(string);
for (SingleAnalysis analysis : analyses) {
if (analysis.isUnknown()) {
continue;
}
if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
continue;
}
String formatted = formatter.formatToCase(analysis, caseType, getApostrophe(word));
results.add(formatted);
}
}
return new ArrayList<>(results);
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSpellChecker method check.
public boolean check(String input) {
WordAnalysis analyses = morphology.analyze(input);
WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(input);
for (SingleAnalysis analysis : analyses) {
if (analysis.isUnknown()) {
continue;
}
if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
continue;
}
String apostrophe = getApostrophe(input);
if (formatter.canBeFormatted(analysis, caseType)) {
String formatted = formatter.formatToCase(analysis, caseType, apostrophe);
if (input.equals(formatted)) {
return true;
}
}
}
return false;
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class AnalyzerController method home.
@GetMapping("/analyze")
@ResponseBody
String home(@RequestParam(name = "word", required = false, defaultValue = "") String sentence) {
List<WordAnalysis> analysisList = morphology.analyzeSentence(sentence);
StringBuilder sb = new StringBuilder("Input: " + sentence);
for (WordAnalysis wa : analysisList) {
sb.append("<div>" + wa.getInput() + "</div>");
for (SingleAnalysis sa : wa) {
sb.append("<div>" + sa.formatLong() + "</div>");
}
}
sb.append("Disambiguation result:");
SentenceAnalysis disambiguated = morphology.disambiguate(sentence, analysisList);
for (SingleAnalysis sa : disambiguated.bestAnalysis()) {
sb.append("<div>" + sa.formatLong() + "</div>");
}
return sb.toString();
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class AnalyzeWords method main.
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "kalemi";
Log.info("Word = " + word);
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
Log.info("Lexical and Surface : " + result.formatLong());
Log.info("Only Lexical : " + result.formatLexical());
Log.info("Oflazer style : " + AnalysisFormatters.OFLAZER_STYLE.format(result));
Log.info();
}
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class AmbiguousExampleFinder method extractSentences.
private static void extractSentences(TurkishMorphology morphology, AmbiguousExampleFinder finder) throws Exception {
List<String> ambiguousWords = Files.readAllLines(Paths.get("data/ambiguity/zemberek-ambigious-words.txt"), StandardCharsets.UTF_8).subList(0, 100);
Path out = Paths.get("data/ambiguity/sentences.txt");
Path morph = Paths.get("data/ambiguity/sentences.morph.txt");
try (PrintWriter pw = new PrintWriter(out.toFile(), "utf-8");
PrintWriter pwMorph = new PrintWriter(morph.toFile(), "utf-8")) {
for (String word : ambiguousWords) {
Log.info(word);
List<String> sentences = finder.getSentences(word, 3, 5, 10);
pw.println(word);
sentences.forEach(pw::println);
pw.println();
for (String sentence : sentences) {
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
if (containsUnkown(analysis)) {
continue;
}
pwMorph.format("S:%s%n", sentence);
for (SentenceWordAnalysis sw : analysis) {
WordAnalysis wa = sw.getWordAnalysis();
pwMorph.println(wa.getInput());
SingleAnalysis best = sw.getBestAnalysis();
for (SingleAnalysis singleAnalysis : wa) {
boolean isBest = singleAnalysis.equals(best);
if (wa.analysisCount() == 1) {
pwMorph.println(singleAnalysis.formatLong());
} else {
pwMorph.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
}
}
}
pwMorph.println();
}
}
}
}
Aggregations