use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class MorphologyConsole method run.
@Override
public void run() {
Builder b = TurkishMorphology.builder().setLexicon(RootLexicon.getDefault());
if (disableUnknownAnalysis) {
b.disableUnidentifiedTokenAnalyzer();
}
if (enableInformalWordAnalysis) {
b.useInformalAnalysis();
}
TurkishMorphology morphology = b.build();
String input;
System.out.println("Enter word or sentence. Type `quit` or `Ctrl+C` to exit.:");
Scanner sc = new Scanner(System.in);
input = sc.nextLine();
while (!input.equals("quit")) {
if (input.trim().length() == 0) {
System.out.println("Empty line cannot be processed.");
input = sc.nextLine();
continue;
}
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(input);
System.out.format("%nS:%s%n", input);
for (SentenceWordAnalysis sw : analysis) {
WordAnalysis wa = sw.getWordAnalysis();
System.out.println(wa.getInput());
SingleAnalysis best = sw.getBestAnalysis();
for (SingleAnalysis singleAnalysis : wa) {
boolean isBest = singleAnalysis.equals(best);
if (wa.analysisCount() == 1) {
System.out.println(singleAnalysis.formatLong());
} else {
System.out.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
}
}
}
System.out.println();
input = sc.nextLine();
}
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class PreprocessTurkishCorpus method replaceWordsWithLemma.
private String replaceWordsWithLemma(String sentence) {
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<String> res = new ArrayList<>();
for (SentenceWordAnalysis e : analysis) {
SingleAnalysis best = e.getBestAnalysis();
if (best.isUnknown()) {
res.add(e.getWordAnalysis().getInput());
continue;
}
List<String> lemmas = best.getLemmas();
res.add(lemmas.get(0));
}
return String.join(" ", res);
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class QuestionClassifier method splitWords.
private String splitWords(String sentence) {
List<String> tokens = Splitter.on(" ").splitToList(sentence);
// assume first is label. Remove label from sentence for morphological analysis.
String label = tokens.get(0);
tokens = tokens.subList(1, tokens.size());
sentence = String.join(" ", tokens);
if (sentence.length() == 0) {
return sentence;
}
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<String> res = new ArrayList<>();
// add label first.
res.add(label);
for (SentenceWordAnalysis e : analysis) {
SingleAnalysis best = e.getBestAnalysis();
if (best.isUnknown()) {
res.add(e.getWordAnalysis().getInput());
continue;
}
List<String> lemmas = best.getLemmas();
res.add(lemmas.get(lemmas.size() - 1));
}
return String.join(" ", res);
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class AnalyzeAndConvertInformalWords method main.
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.builder().setLexicon(RootLexicon.getDefault()).useInformalAnalysis().build();
List<SingleAnalysis> analyses = morphology.analyzeAndDisambiguate("okuycam diyo").bestAnalysis();
for (SingleAnalysis a : analyses) {
System.out.println(a.surfaceForm() + "-" + a);
}
System.out.println("Converting formal surface form:");
InformalAnalysisConverter converter = new InformalAnalysisConverter(morphology.getWordGenerator());
for (SingleAnalysis a : analyses) {
System.out.println(converter.convert(a.surfaceForm(), a));
}
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class DisambiguateSentences method main.
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String sentence = "Bol baharatlı bir yemek yaptıralım.";
Log.info("Sentence = " + sentence);
List<WordAnalysis> analyses = morphology.analyzeSentence(sentence);
Log.info("Sentence word analysis result:");
for (WordAnalysis entry : analyses) {
Log.info("Word = " + entry.getInput());
for (SingleAnalysis analysis : entry) {
Log.info(analysis.formatLong());
}
}
SentenceAnalysis result = morphology.disambiguate(sentence, analyses);
Log.info("\nAfter ambiguity resolution : ");
result.bestAnalysis().forEach(Log::info);
}
Aggregations