use of zemberek.core.turkish.PrimaryPos in project zemberek-nlp by ahmetaa.
the class TurkishSuffixes method defineSuccessorSuffixes.
@Override
public SuffixData[] defineSuccessorSuffixes(DictionaryItem item) {
SuffixData original = new SuffixData();
SuffixData modified = new SuffixData();
PrimaryPos primaryPos = item.primaryPos;
switch(primaryPos) {
case Verb:
getForVerb(item, original, modified);
break;
default:
break;
}
return new SuffixData[] { original, modified };
}
use of zemberek.core.turkish.PrimaryPos in project zemberek-nlp by ahmetaa.
the class TurkishMorphology method analyzeWordsWithApostrophe.
public List<SingleAnalysis> analyzeWordsWithApostrophe(String word) {
int index = word.indexOf('\'');
if (index <= 0 || index == word.length() - 1) {
return Collections.emptyList();
}
StemAndEnding se = new StemAndEnding(word.substring(0, index), word.substring(index + 1));
String stem = TurkishAlphabet.INSTANCE.normalize(se.stem);
String withoutQuote = word.replace("'", "");
List<SingleAnalysis> noQuotesParses = analyzer.analyze(withoutQuote);
if (noQuotesParses.size() == 0) {
return Collections.emptyList();
}
// words like "Hastanesi'ne". Should we accept Hastanesi or Hastane?
return noQuotesParses.stream().filter(a -> a.getDictionaryItem().primaryPos == PrimaryPos.Noun && (a.containsMorpheme(TurkishMorphotactics.p3sg) || a.getStem().equals(stem))).collect(Collectors.toList());
}
use of zemberek.core.turkish.PrimaryPos in project zemberek-nlp by ahmetaa.
the class TurkishStopWords method generateFromDictionary.
static TurkishStopWords generateFromDictionary() throws IOException {
Set<PrimaryPos> pos = Sets.newHashSet(PrimaryPos.Adverb, PrimaryPos.Conjunction, PrimaryPos.Determiner, PrimaryPos.Interjection, PrimaryPos.PostPositive, PrimaryPos.Numeral, PrimaryPos.Pronoun, PrimaryPos.Question);
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
Set<String> set = new HashSet<>();
RootLexicon lexicon = morphology.getLexicon();
for (DictionaryItem item : lexicon) {
if (pos.contains(item.primaryPos)) {
set.add(item.lemma);
}
}
List<String> str = new ArrayList<>(set);
str.sort(Turkish.STRING_COMPARATOR_ASC);
return new TurkishStopWords(new LinkedHashSet<>(str));
}
use of zemberek.core.turkish.PrimaryPos in project zemberek-nlp by ahmetaa.
the class FindPOS method main.
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String sentence = "Keşke yarın hava güzel olsa.";
Log.info("Sentence = " + sentence);
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
for (SentenceWordAnalysis a : analysis) {
PrimaryPos primaryPos = a.getBestAnalysis().getPos();
Log.info("%s : %s ", a.getWordAnalysis().getInput(), primaryPos);
}
}
Aggregations