Search in sources :

Example 31 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class TurkishSpellChecker method getUnrankedSuggestions.

private List<String> getUnrankedSuggestions(String word) {
    String normalized = TurkishAlphabet.INSTANCE.normalize(word.replaceAll("['’]", ""));
    List<String> strings = decoder.getSuggestions(normalized, charMatcher);
    WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(word);
    if (caseType == WordAnalysisSurfaceFormatter.CaseType.MIXED_CASE || caseType == WordAnalysisSurfaceFormatter.CaseType.LOWER_CASE) {
        caseType = WordAnalysisSurfaceFormatter.CaseType.DEFAULT_CASE;
    }
    Set<String> results = new LinkedHashSet<>(strings.size());
    for (String string : strings) {
        WordAnalysis analyses = morphology.analyze(string);
        for (SingleAnalysis analysis : analyses) {
            if (analysis.isUnknown()) {
                continue;
            }
            if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
                continue;
            }
            String formatted = formatter.formatToCase(analysis, caseType, getApostrophe(word));
            results.add(formatted);
        }
    }
    return new ArrayList<>(results);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) ArrayList(java.util.ArrayList) WordAnalysisSurfaceFormatter(zemberek.morphology.analysis.WordAnalysisSurfaceFormatter)

Example 32 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class TurkishSpellChecker method check.

public boolean check(String input) {
    WordAnalysis analyses = morphology.analyze(input);
    WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(input);
    for (SingleAnalysis analysis : analyses) {
        if (analysis.isUnknown()) {
            continue;
        }
        if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
            continue;
        }
        String apostrophe = getApostrophe(input);
        if (formatter.canBeFormatted(analysis, caseType)) {
            String formatted = formatter.formatToCase(analysis, caseType, apostrophe);
            if (input.equals(formatted)) {
                return true;
            }
        }
    }
    return false;
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) WordAnalysisSurfaceFormatter(zemberek.morphology.analysis.WordAnalysisSurfaceFormatter)

Example 33 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class AnalyzerController method home.

@GetMapping("/analyze")
@ResponseBody
String home(@RequestParam(name = "word", required = false, defaultValue = "") String sentence) {
    List<WordAnalysis> analysisList = morphology.analyzeSentence(sentence);
    StringBuilder sb = new StringBuilder("Input: " + sentence);
    for (WordAnalysis wa : analysisList) {
        sb.append("<div>" + wa.getInput() + "</div>");
        for (SingleAnalysis sa : wa) {
            sb.append("<div>" + sa.formatLong() + "</div>");
        }
    }
    sb.append("Disambiguation result:");
    SentenceAnalysis disambiguated = morphology.disambiguate(sentence, analysisList);
    for (SingleAnalysis sa : disambiguated.bestAnalysis()) {
        sb.append("<div>" + sa.formatLong() + "</div>");
    }
    return sb.toString();
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis)

Example 34 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class AnalyzeWords method main.

public static void main(String[] args) {
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    String word = "kalemi";
    Log.info("Word = " + word);
    WordAnalysis results = morphology.analyze(word);
    for (SingleAnalysis result : results) {
        Log.info("Lexical and Surface : " + result.formatLong());
        Log.info("Only Lexical        : " + result.formatLexical());
        Log.info("Oflazer style       : " + AnalysisFormatters.OFLAZER_STYLE.format(result));
        Log.info();
    }
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Example 35 with WordAnalysis

use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.

the class AmbiguousExampleFinder method extractSentences.

private static void extractSentences(TurkishMorphology morphology, AmbiguousExampleFinder finder) throws Exception {
    List<String> ambiguousWords = Files.readAllLines(Paths.get("data/ambiguity/zemberek-ambigious-words.txt"), StandardCharsets.UTF_8).subList(0, 100);
    Path out = Paths.get("data/ambiguity/sentences.txt");
    Path morph = Paths.get("data/ambiguity/sentences.morph.txt");
    try (PrintWriter pw = new PrintWriter(out.toFile(), "utf-8");
        PrintWriter pwMorph = new PrintWriter(morph.toFile(), "utf-8")) {
        for (String word : ambiguousWords) {
            Log.info(word);
            List<String> sentences = finder.getSentences(word, 3, 5, 10);
            pw.println(word);
            sentences.forEach(pw::println);
            pw.println();
            for (String sentence : sentences) {
                SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
                if (containsUnkown(analysis)) {
                    continue;
                }
                pwMorph.format("S:%s%n", sentence);
                for (SentenceWordAnalysis sw : analysis) {
                    WordAnalysis wa = sw.getWordAnalysis();
                    pwMorph.println(wa.getInput());
                    SingleAnalysis best = sw.getBestAnalysis();
                    for (SingleAnalysis singleAnalysis : wa) {
                        boolean isBest = singleAnalysis.equals(best);
                        if (wa.analysisCount() == 1) {
                            pwMorph.println(singleAnalysis.formatLong());
                        } else {
                            pwMorph.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
                        }
                    }
                }
                pwMorph.println();
            }
        }
    }
}
Also used : Path(java.nio.file.Path) SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) PrintWriter(java.io.PrintWriter) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Aggregations

WordAnalysis (zemberek.morphology.analysis.WordAnalysis)96 Test (org.junit.Test)42 SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)36 TurkishMorphology (zemberek.morphology.TurkishMorphology)22 ArrayList (java.util.ArrayList)21 SentenceAnalysis (zemberek.morphology.analysis.SentenceAnalysis)19 LinkedHashSet (java.util.LinkedHashSet)13 Ignore (org.junit.Ignore)13 Histogram (zemberek.core.collections.Histogram)12 Path (java.nio.file.Path)11 PrintWriter (java.io.PrintWriter)10 SentenceWordAnalysis (zemberek.morphology.analysis.SentenceWordAnalysis)10 IOException (java.io.IOException)6 HashSet (java.util.HashSet)6 List (java.util.List)6 WordAnalyzer (zemberek.morphology.analysis.WordAnalyzer)6 SimpleGenerator (zemberek.morphology.generator.SimpleGenerator)6 DictionaryItem (zemberek.morphology.lexicon.DictionaryItem)6 DynamicLexiconGraph (zemberek.morphology.lexicon.graph.DynamicLexiconGraph)6 Log (zemberek.core.logging.Log)5