use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class AddNewDictionaryItem method printResults.
private void printResults(WordAnalysis results) {
int i = 1;
if (results.analysisCount() == 0) {
Log.info("No Analysis.");
}
for (SingleAnalysis result : results) {
String str = result.formatLong();
if (result.getDictionaryItem().attributes.contains(RootAttribute.Runtime)) {
str = str + " (Generated by UnidentifiedTokenParser)";
}
Log.info(i + " - " + str);
i++;
}
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class StemmingAndLemmatization method main.
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "kutucuğumuz";
Log.info("Word = " + word);
Log.info("Results: ");
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
Log.info(result.formatLong());
Log.info("\tStems = " + result.getStems());
Log.info("\tLemmas = " + result.getLemmas());
}
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishMorphology method analyzeWordsWithApostrophe.
public List<SingleAnalysis> analyzeWordsWithApostrophe(String word) {
int index = word.indexOf('\'');
if (index <= 0 || index == word.length() - 1) {
return Collections.emptyList();
}
StemAndEnding se = new StemAndEnding(word.substring(0, index), word.substring(index + 1));
String stem = TurkishAlphabet.INSTANCE.normalize(se.stem);
String withoutQuote = word.replace("'", "");
List<SingleAnalysis> noQuotesParses = analyzer.analyze(withoutQuote);
if (noQuotesParses.size() == 0) {
return Collections.emptyList();
}
// words like "Hastanesi'ne". Should we accept Hastanesi or Hastane?
return noQuotesParses.stream().filter(a -> a.getDictionaryItem().primaryPos == PrimaryPos.Noun && (a.containsMorpheme(TurkishMorphotactics.p3sg) || a.getStem().equals(stem))).collect(Collectors.toList());
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class AmbiguityResolutionTests method issue157ShouldNotThrowNPE.
@Test
public void issue157ShouldNotThrowNPE() {
String input = "Yıldız Kızlar Dünya Şampiyonası FIVB'nin düzenlediği ve 18 " + "yaşının altındaki voleybolcuların katılabildiği bir şampiyonadır.";
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(input);
Assert.assertEquals(TurkishTokenizer.DEFAULT.tokenize(input).size(), analysis.size());
for (SentenceWordAnalysis sentenceWordAnalysis : analysis) {
String token = sentenceWordAnalysis.getWordAnalysis().getInput();
SingleAnalysis an = sentenceWordAnalysis.getBestAnalysis();
System.out.println(token + " = " + an.formatLong());
}
}
use of zemberek.morphology.analysis.SingleAnalysis in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolverTrainer method test.
public static void test(DataSet set, PerceptronAmbiguityResolver resolver) {
int hit = 0, total = 0;
Stopwatch sw = Stopwatch.createStarted();
for (SentenceAnalysis sentence : set.sentences) {
DecodeResult result = resolver.getDecoder().bestPath(sentence.ambiguousAnalysis());
int i = 0;
List<SingleAnalysis> bestExpected = sentence.bestAnalysis();
for (SingleAnalysis bestActual : result.bestParse) {
if (bestExpected.get(i).equals(bestActual)) {
hit++;
}
total++;
i++;
}
}
Log.info("Elapsed: " + sw.elapsed(TimeUnit.MILLISECONDS));
Log.info("Word count:" + total + " hit=" + hit + String.format(Locale.ENGLISH, " Accuracy:%f", hit * 1.0 / total));
}
Aggregations