use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSentenceAnalyzer method analyze.
public SentenceAnalysis analyze(String sentence) {
SentenceAnalysis sentenceParse = new SentenceAnalysis();
String preprocessed = preProcess(sentence);
for (String s : Splitter.on(" ").omitEmptyStrings().trimResults().split(preprocessed)) {
List<WordAnalysis> parses = turkishMorphology.analyze(s);
sentenceParse.addParse(s, parses);
}
return sentenceParse;
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSentenceAnalyzer method bestParse.
/**
* Returns the best parse of a sentence.
*
* @param sentence sentence
* @return best parse.
*/
public List<WordAnalysis> bestParse(String sentence) {
SentenceAnalysis parse = analyze(sentence);
disambiguate(parse);
List<WordAnalysis> bestParse = Lists.newArrayListWithCapacity(parse.size());
for (SentenceAnalysis.Entry entry : parse) {
bestParse.add(entry.parses.get(0));
}
return bestParse;
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class UnidentifiedTokenAnalyzer method parseNumeral.
public List<WordAnalysis> parseNumeral(String s) {
StemAndEnding se = getFromNumeral(s);
String lemma;
if (se.stem.endsWith(".")) {
String ss = se.stem.substring(0, se.stem.length() - 1);
lemma = numeralEndingMachine.find(ss);
lemma = ordinalMap.get(lemma);
} else {
lemma = numeralEndingMachine.find(se.stem);
}
List<WordAnalysis> results = Lists.newArrayListWithCapacity(1);
for (TurkishDictionaryLoader.Digit digit : TurkishDictionaryLoader.Digit.values()) {
Matcher m = digit.pattern.matcher(se.stem);
if (m.find()) {
String toParse;
if (se.ending.length() > 0 && lemma.equals("dört") && TurkishAlphabet.INSTANCE.isVowel(se.ending.charAt(0))) {
toParse = "dörd" + se.ending;
} else {
toParse = lemma + se.ending;
}
List<WordAnalysis> res = turkishParser.getWordAnalyzer().analyze(toParse);
for (WordAnalysis re : res) {
if (re.dictionaryItem.primaryPos != PrimaryPos.Numeral) {
continue;
}
re.dictionaryItem = new DictionaryItem(se.stem, se.stem, s + lemma, PrimaryPos.Numeral, digit.secondaryPos);
re.dictionaryItem.attributes.add(RootAttribute.Runtime);
re.root = se.stem;
results.add(re);
}
}
}
return results;
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class AddNewDictionaryItem method printResults.
private void printResults(List<WordAnalysis> results) {
int i = 1;
for (WordAnalysis result : results) {
String str = result.formatLong();
if (result.dictionaryItem.attributes.contains(RootAttribute.Runtime)) {
str = str + " (Generated by UnidentifiedTokenParser)";
}
System.out.println(i + " - " + str);
i++;
}
}
use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class UnidentifiedTokenAnalyzerTest method shouldParseSmallCaseProperNounsWithSingleQuote.
@Test
public void shouldParseSmallCaseProperNounsWithSingleQuote() throws IOException {
HashSet<String> expected = Sets.newHashSet("[(İstanbul:istanbul) (Noun,Prop;A3sg+P2sg:un+Nom)]", "[(İstanbul:istanbul) (Noun,Prop;A3sg+Pnon+Gen:un)]");
TurkishMorphology parser = TurkishMorphology.builder().addTextDictionaryResources("dev-lexicon.txt").build();
UnidentifiedTokenAnalyzer uiParser = new UnidentifiedTokenAnalyzer(parser);
List<WordAnalysis> results = uiParser.analyze("İstanbul'un");
Assert.assertEquals(2, results.size());
for (WordAnalysis result : results) {
Assert.assertTrue(expected.contains(result.formatLong()));
}
results = uiParser.analyze("istanbul'un");
Assert.assertEquals(2, results.size());
for (WordAnalysis result : results) {
Assert.assertTrue(expected.contains(result.formatLong()));
}
results = uiParser.analyze("Ankara'ya");
Assert.assertEquals(1, results.size());
Assert.assertEquals("[(Ankara:ankara) (Noun,Prop;A3sg+Pnon+Dat:ya)]", results.get(0).formatLong());
results = uiParser.analyze("ankara'ya");
Assert.assertEquals(1, results.size());
Assert.assertEquals("[(Ankara:ankara) (Noun,Prop;A3sg+Pnon+Dat:ya)]", results.get(0).formatLong());
// Karaman does not exist in dictionary
results = uiParser.analyze("Karaman");
Assert.assertEquals(1, results.size());
Assert.assertEquals("[(Karaman:karaman) (Noun,Prop;A3sg+Pnon+Nom)]", results.get(0).formatLong());
results = uiParser.analyze("karaman'a");
Assert.assertEquals(1, results.size());
Assert.assertEquals("[(Karaman:karaman) (Noun,Prop;A3sg+Pnon+Dat:a)]", results.get(0).formatLong());
results = uiParser.analyze("karaman");
Assert.assertEquals(0, results.size());
}
Aggregations