Search in sources :

Example 16 with SentenceWordAnalysis

use of zemberek.morphology.analysis.SentenceWordAnalysis in project zemberek-nlp by ahmetaa.

the class QuestionClassifier method splitWords.

private String splitWords(String sentence) {
    List<String> tokens = Splitter.on(" ").splitToList(sentence);
    // assume first is label. Remove label from sentence for morphological analysis.
    String label = tokens.get(0);
    tokens = tokens.subList(1, tokens.size());
    sentence = String.join(" ", tokens);
    if (sentence.length() == 0) {
        return sentence;
    }
    SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
    List<String> res = new ArrayList<>();
    // add label first.
    res.add(label);
    for (SentenceWordAnalysis e : analysis) {
        SingleAnalysis best = e.getBestAnalysis();
        if (best.isUnknown()) {
            res.add(e.getWordAnalysis().getInput());
            continue;
        }
        List<String> lemmas = best.getLemmas();
        res.add(lemmas.get(lemmas.size() - 1));
    }
    return String.join(" ", res);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) ArrayList(java.util.ArrayList) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 17 with SentenceWordAnalysis

use of zemberek.morphology.analysis.SentenceWordAnalysis in project zemberek-nlp by ahmetaa.

the class Scripts method saveUnambiguous.

public static void saveUnambiguous(List<SentenceAnalysis> sentences, Path out) throws IOException {
    try (PrintWriter pwMorph = new PrintWriter(out.toFile(), "utf-8")) {
        for (SentenceAnalysis analysis : sentences) {
            if (analysis.bestAnalysis().stream().anyMatch(SingleAnalysis::isUnknown)) {
                continue;
            }
            pwMorph.format("S:%s%n", analysis.getSentence());
            for (SentenceWordAnalysis sw : analysis) {
                WordAnalysis wa = sw.getWordAnalysis();
                pwMorph.println(wa.getInput());
                SingleAnalysis best = sw.getBestAnalysis();
                for (SingleAnalysis singleAnalysis : wa) {
                    boolean isBest = singleAnalysis.equals(best);
                    if (wa.analysisCount() == 1) {
                        pwMorph.println(singleAnalysis.formatLong());
                    } else {
                        pwMorph.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
                    }
                }
            }
            pwMorph.println();
        }
    }
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) PrintWriter(java.io.PrintWriter) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 18 with SentenceWordAnalysis

use of zemberek.morphology.analysis.SentenceWordAnalysis in project zemberek-nlp by ahmetaa.

the class ClassificationExampleBase method splitWords.

protected String splitWords(String sentence) {
    List<String> tokens = Splitter.on(" ").splitToList(sentence);
    // assume first is label. Remove label from sentence for morphological analysis.
    String label = tokens.get(0);
    tokens = tokens.subList(1, tokens.size());
    sentence = String.join(" ", tokens);
    if (sentence.length() == 0) {
        return sentence;
    }
    SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
    List<String> res = new ArrayList<>();
    // add label first.
    res.add(label);
    for (SentenceWordAnalysis e : analysis) {
        SingleAnalysis best = e.getBestAnalysis();
        String input = e.getWordAnalysis().getInput();
        if (best.isUnknown()) {
            res.add(input);
            continue;
        }
        List<String> lemmas = best.getLemmas();
        String l = lemmas.get(0);
        if (l.length() < input.length()) {
            res.add(l);
            String substring = input.substring(l.length());
            res.add("_" + substring);
        } else {
            res.add(l);
        }
    }
    return String.join(" ", res);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) ArrayList(java.util.ArrayList) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 19 with SentenceWordAnalysis

use of zemberek.morphology.analysis.SentenceWordAnalysis in project zemberek-nlp by ahmetaa.

the class FindPOS method main.

public static void main(String[] args) {
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    String sentence = "Keşke yarın hava güzel olsa.";
    Log.info("Sentence  = " + sentence);
    SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
    for (SentenceWordAnalysis a : analysis) {
        PrimaryPos primaryPos = a.getBestAnalysis().getPos();
        Log.info("%s : %s ", a.getWordAnalysis().getInput(), primaryPos);
    }
}
Also used : PrimaryPos(zemberek.core.turkish.PrimaryPos) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) TurkishMorphology(zemberek.morphology.TurkishMorphology) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Aggregations

SentenceAnalysis (zemberek.morphology.analysis.SentenceAnalysis)19 SentenceWordAnalysis (zemberek.morphology.analysis.SentenceWordAnalysis)19 SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)18 ArrayList (java.util.ArrayList)12 WordAnalysis (zemberek.morphology.analysis.WordAnalysis)8 TurkishMorphology (zemberek.morphology.TurkishMorphology)7 PrintWriter (java.io.PrintWriter)4 Histogram (zemberek.core.collections.Histogram)4 Path (java.nio.file.Path)3 Token (zemberek.tokenization.Token)3 Lists (com.google.common.collect.Lists)2 IOException (java.io.IOException)2 Paths (java.nio.file.Paths)2 Collections (java.util.Collections)2 LinkedHashSet (java.util.LinkedHashSet)2 List (java.util.List)2 Scanner (java.util.Scanner)2 Collectors (java.util.stream.Collectors)2 Log (zemberek.core.logging.Log)2 Files (java.nio.file.Files)1