Search in sources :

Example 36 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class MorphologyConsole method run.

@Override
public void run() {
    Builder b = TurkishMorphology.builder().setLexicon(RootLexicon.getDefault());
    if (disableUnknownAnalysis) {
        b.disableUnidentifiedTokenAnalyzer();
    }
    if (enableInformalWordAnalysis) {
        b.useInformalAnalysis();
    }
    TurkishMorphology morphology = b.build();
    String input;
    System.out.println("Enter word or sentence. Type `quit` or `Ctrl+C` to exit.:");
    Scanner sc = new Scanner(System.in);
    input = sc.nextLine();
    while (!input.equals("quit")) {
        if (input.trim().length() == 0) {
            System.out.println("Empty line cannot be processed.");
            input = sc.nextLine();
            continue;
        }
        SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(input);
        System.out.format("%nS:%s%n", input);
        for (SentenceWordAnalysis sw : analysis) {
            WordAnalysis wa = sw.getWordAnalysis();
            System.out.println(wa.getInput());
            SingleAnalysis best = sw.getBestAnalysis();
            for (SingleAnalysis singleAnalysis : wa) {
                boolean isBest = singleAnalysis.equals(best);
                if (wa.analysisCount() == 1) {
                    System.out.println(singleAnalysis.formatLong());
                } else {
                    System.out.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
                }
            }
        }
        System.out.println();
        input = sc.nextLine();
    }
}
Also used : Scanner(java.util.Scanner) SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) Builder(zemberek.morphology.TurkishMorphology.Builder) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) TurkishMorphology(zemberek.morphology.TurkishMorphology) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 37 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class PreprocessTurkishCorpus method replaceWordsWithLemma.

private String replaceWordsWithLemma(String sentence) {
    SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
    List<String> res = new ArrayList<>();
    for (SentenceWordAnalysis e : analysis) {
        SingleAnalysis best = e.getBestAnalysis();
        if (best.isUnknown()) {
            res.add(e.getWordAnalysis().getInput());
            continue;
        }
        List<String> lemmas = best.getLemmas();
        res.add(lemmas.get(0));
    }
    return String.join(" ", res);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) ArrayList(java.util.ArrayList) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 38 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class QuestionClassifier method splitWords.

private String splitWords(String sentence) {
    List<String> tokens = Splitter.on(" ").splitToList(sentence);
    // assume first is label. Remove label from sentence for morphological analysis.
    String label = tokens.get(0);
    tokens = tokens.subList(1, tokens.size());
    sentence = String.join(" ", tokens);
    if (sentence.length() == 0) {
        return sentence;
    }
    SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
    List<String> res = new ArrayList<>();
    // add label first.
    res.add(label);
    for (SentenceWordAnalysis e : analysis) {
        SingleAnalysis best = e.getBestAnalysis();
        if (best.isUnknown()) {
            res.add(e.getWordAnalysis().getInput());
            continue;
        }
        List<String> lemmas = best.getLemmas();
        res.add(lemmas.get(lemmas.size() - 1));
    }
    return String.join(" ", res);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) ArrayList(java.util.ArrayList) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) SentenceWordAnalysis(zemberek.morphology.analysis.SentenceWordAnalysis)

Example 39 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class AnalyzeAndConvertInformalWords method main.

public static void main(String[] args) {
    TurkishMorphology morphology = TurkishMorphology.builder().setLexicon(RootLexicon.getDefault()).useInformalAnalysis().build();
    List<SingleAnalysis> analyses = morphology.analyzeAndDisambiguate("okuycam diyo").bestAnalysis();
    for (SingleAnalysis a : analyses) {
        System.out.println(a.surfaceForm() + "-" + a);
    }
    System.out.println("Converting formal surface form:");
    InformalAnalysisConverter converter = new InformalAnalysisConverter(morphology.getWordGenerator());
    for (SingleAnalysis a : analyses) {
        System.out.println(converter.convert(a.surfaceForm(), a));
    }
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) InformalAnalysisConverter(zemberek.morphology.analysis.InformalAnalysisConverter) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Example 40 with SingleAnalysis

use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.

the class DisambiguateSentences method main.

public static void main(String[] args) {
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    String sentence = "Bol baharatlı bir yemek yaptıralım.";
    Log.info("Sentence  = " + sentence);
    List<WordAnalysis> analyses = morphology.analyzeSentence(sentence);
    Log.info("Sentence word analysis result:");
    for (WordAnalysis entry : analyses) {
        Log.info("Word = " + entry.getInput());
        for (SingleAnalysis analysis : entry) {
            Log.info(analysis.formatLong());
        }
    }
    SentenceAnalysis result = morphology.disambiguate(sentence, analyses);
    Log.info("\nAfter ambiguity resolution : ");
    result.bestAnalysis().forEach(Log::info);
}
Also used : SingleAnalysis(zemberek.morphology.analysis.SingleAnalysis) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) Log(zemberek.core.logging.Log) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Aggregations

SingleAnalysis (zemberek.morphology.analysis.SingleAnalysis)55 WordAnalysis (zemberek.morphology.analysis.WordAnalysis)38 ArrayList (java.util.ArrayList)25 SentenceAnalysis (zemberek.morphology.analysis.SentenceAnalysis)23 TurkishMorphology (zemberek.morphology.TurkishMorphology)21 SentenceWordAnalysis (zemberek.morphology.analysis.SentenceWordAnalysis)18 Test (org.junit.Test)15 LinkedHashSet (java.util.LinkedHashSet)13 PrintWriter (java.io.PrintWriter)10 Path (java.nio.file.Path)10 Histogram (zemberek.core.collections.Histogram)10 Token (zemberek.tokenization.Token)7 IOException (java.io.IOException)6 Ignore (org.junit.Ignore)6 Log (zemberek.core.logging.Log)6 HashSet (java.util.HashSet)5 List (java.util.List)5 Collectors (java.util.stream.Collectors)5 Paths (java.nio.file.Paths)4 Files (java.nio.file.Files)3