use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolverTrainer method train.
public PerceptronAmbiguityResolver train(DataSet trainingSet, DataSet devSet, int iterationCount) {
FeatureExtractor extractor = new FeatureExtractor(false);
Decoder decoder = new Decoder(weights, extractor);
int numExamples = 0;
for (int i = 0; i < iterationCount; i++) {
Log.info("Iteration:" + i);
trainingSet.shuffle();
for (SentenceAnalysis sentence : trainingSet.sentences) {
if (sentence.size() == 0) {
continue;
}
numExamples++;
DecodeResult result = decoder.bestPath(sentence.ambiguousAnalysis());
if (sentence.bestAnalysis().equals(result.bestParse)) {
continue;
}
if (sentence.bestAnalysis().size() != result.bestParse.size()) {
throw new IllegalStateException("Best parse result must have same amount of tokens with Correct parse." + " \nCorrect = " + sentence.bestAnalysis() + " \nBest = " + result.bestParse);
}
IntValueMap<String> correctFeatures = extractor.extractFeatureCounts(sentence.bestAnalysis());
IntValueMap<String> bestFeatures = extractor.extractFeatureCounts(result.bestParse);
updateModel(correctFeatures, bestFeatures, numExamples);
}
for (String feat : averagedWeights) {
updateAveragedWeights(feat, numExamples);
counts.put(feat, numExamples);
}
Log.info("Testing development set.");
PerceptronAmbiguityResolver disambiguator = new PerceptronAmbiguityResolver(averagedWeights, extractor);
test(devSet, disambiguator);
}
return new PerceptronAmbiguityResolver(averagedWeights, new FeatureExtractor(false));
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolver method disambiguate.
@Override
public SentenceAnalysis disambiguate(String sentence, List<WordAnalysis> allAnalyses) {
DecodeResult best = decoder.bestPath(allAnalyses);
List<SentenceWordAnalysis> l = new ArrayList<>();
for (int i = 0; i < allAnalyses.size(); i++) {
WordAnalysis wordAnalysis = allAnalyses.get(i);
SingleAnalysis analysis = best.bestParse.get(i);
l.add(new SentenceWordAnalysis(analysis, wordAnalysis));
}
return new SentenceAnalysis(sentence, l);
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class MorphologyServiceImpl method analyzeSentence.
@Override
public void analyzeSentence(SentenceAnalysisRequest request, StreamObserver<SentenceAnalysisProto> responseObserver) {
String sentence = request.getInput();
SentenceAnalysis a = morphology.analyzeAndDisambiguate(sentence);
Log.info("Sentence = %s", sentence);
responseObserver.onNext(toSentenceAnalysis(a, request.getContainAllAnalyses()));
responseObserver.onCompleted();
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class ClassificationConsole method replaceWordsWithLemma.
private String replaceWordsWithLemma(String sentence) {
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<String> res = new ArrayList<>();
for (SentenceWordAnalysis e : analysis) {
SingleAnalysis best = e.getBestAnalysis();
if (best.isUnknown()) {
res.add(e.getWordAnalysis().getInput());
continue;
}
List<String> lemmas = best.getLemmas();
res.add(lemmas.get(lemmas.size() - 1));
}
return String.join(" ", res);
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class AnalyzerController method home.
@GetMapping("/analyze")
@ResponseBody
String home(@RequestParam(name = "word", required = false, defaultValue = "") String sentence) {
List<WordAnalysis> analysisList = morphology.analyzeSentence(sentence);
StringBuilder sb = new StringBuilder("Input: " + sentence);
for (WordAnalysis wa : analysisList) {
sb.append("<div>" + wa.getInput() + "</div>");
for (SingleAnalysis sa : wa) {
sb.append("<div>" + sa.formatLong() + "</div>");
}
}
sb.append("Disambiguation result:");
SentenceAnalysis disambiguated = morphology.disambiguate(sentence, analysisList);
for (SingleAnalysis sa : disambiguated.bestAnalysis()) {
sb.append("<div>" + sa.formatLong() + "</div>");
}
return sb.toString();
}
Aggregations