Search in sources :

Example 21 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class PerceptronAmbiguityResolver method disambiguate.

@Override
public SentenceAnalysis disambiguate(String sentence, List<WordAnalysis> allAnalyses) {
    DecodeResult best = decoder.bestPath(allAnalyses);
    List<SentenceWordAnalysis> l = new ArrayList<>();
    for (int i = 0; i < allAnalyses.size(); i++) {
        WordAnalysis wordAnalysis = allAnalyses.get(i);
        SingleAnalysis analysis = best.bestParse.get(i);
        l.add(new SentenceWordAnalysis(analysis, wordAnalysis));
    }
    return new SentenceAnalysis(sentence, l);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) ArrayList(java.util.ArrayList) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 22 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class ClassificationConsole method replaceWordsWithLemma.

private String replaceWordsWithLemma(String sentence) {
    SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
    List<String> res = new ArrayList<>();
    for (SentenceWordAnalysis e : analysis) {
        SingleAnalysis best = e.getBestAnalysis();
        if (best.isUnknown()) {
            res.add(e.getWordAnalysis().getInput());
            continue;
        }
        List<String> lemmas = best.getLemmas();
        res.add(lemmas.get(lemmas.size() - 1));
    }
    return String.join(" ", res);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) ArrayList(java.util.ArrayList) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 23 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class TurkishSpellChecker method getUnrankedSuggestions.

private List<String> getUnrankedSuggestions(String word) {
    String normalized = TurkishAlphabet.INSTANCE.normalize(word.replaceAll("['’]", ""));
    List<String> strings = decoder.getSuggestions(normalized, charMatcher);
    WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(word);
    if (caseType == WordAnalysisSurfaceFormatter.CaseType.MIXED_CASE || caseType == WordAnalysisSurfaceFormatter.CaseType.LOWER_CASE) {
        caseType = WordAnalysisSurfaceFormatter.CaseType.DEFAULT_CASE;
    }
    Set<String> results = new LinkedHashSet<>(strings.size());
    for (String string : strings) {
        WordAnalysis analyses = morphology.analyze(string);
        for (SingleAnalysis analysis : analyses) {
            if (analysis.isUnknown()) {
                continue;
            }
            if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
                continue;
            }
            String formatted = formatter.formatToCase(analysis, caseType, getApostrophe(word));
            results.add(formatted);
        }
    }
    return new ArrayList<>(results);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) ArrayList(java.util.ArrayList) WordAnalysisSurfaceFormatter(zemberek.morphology.analysis.WordAnalysisSurfaceFormatter)

Example 24 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class TurkishSpellChecker method check.

public boolean check(String input) {
    WordAnalysis analyses = morphology.analyze(input);
    WordAnalysisSurfaceFormatter.CaseType caseType = formatter.guessCase(input);
    for (SingleAnalysis analysis : analyses) {
        if (analysis.isUnknown()) {
            continue;
        }
        if (analysisPredicate != null && !analysisPredicate.test(analysis)) {
            continue;
        }
        String apostrophe = getApostrophe(input);
        if (formatter.canBeFormatted(analysis, caseType)) {
            String formatted = formatter.formatToCase(analysis, caseType, apostrophe);
            if (input.equals(formatted)) {
                return true;
            }
        }
    }
    return false;
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) WordAnalysisSurfaceFormatter(zemberek.morphology.analysis.WordAnalysisSurfaceFormatter)

Example 25 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class AnalyzerController method home.

@GetMapping("/analyze")
@ResponseBody
String home(@RequestParam(name = "word", required = false, defaultValue = "") String sentence) {
    List<WordAnalysis> analysisList = morphology.analyzeSentence(sentence);
    StringBuilder sb = new StringBuilder("Input: " + sentence);
    for (WordAnalysis wa : analysisList) {
        sb.append("<div>" + wa.getInput() + "</div>");
        for (SingleAnalysis sa : wa) {
            sb.append("<div>" + sa.formatLong() + "</div>");
        }
    }
    sb.append("Disambiguation result:");
    SentenceAnalysis disambiguated = morphology.disambiguate(sentence, analysisList);
    for (SingleAnalysis sa : disambiguated.bestAnalysis()) {
        sb.append("<div>" + sa.formatLong() + "</div>");
    }
    return sb.toString();
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis)

Aggregations

SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)55 WordAnalysis (zemberek.morphology.analysis.WordAnalysis)38 ArrayList (java.util.ArrayList)25 SentenceAnalysis (zemberek.morphology.analysis.SentenceAnalysis)23 TurkishMorphology (zemberek.morphology.TurkishMorphology)21 SentenceWordAnalysis (zemberek.morphology.analysis.SentenceWordAnalysis)18 Test (org.junit.Test)15 LinkedHashSet (java.util.LinkedHashSet)13 PrintWriter (java.io.PrintWriter)10 Path (java.nio.file.Path)10 Histogram (zemberek.core.collections.Histogram)10 Token (zemberek.tokenization.Token)7 IOException (java.io.IOException)6 Ignore (org.junit.Ignore)6 Log (zemberek.core.logging.Log)6 HashSet (java.util.HashSet)5 List (java.util.List)5 Collectors (java.util.stream.Collectors)5 Paths (java.nio.file.Paths)4 Files (java.nio.file.Files)3