use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class Z3ModelA method getAmbiguousSequence.
public Ambiguous[] getAmbiguousSequence(SentenceAnalysis sentence) {
Ambiguous[] awords = new Ambiguous[sentence.size() + 3];
awords[0] = startWord;
awords[1] = startWord;
int i = 2;
for (SentenceAnalysis.Entry entry : sentence) {
int[] roots = new int[entry.parses.size()];
int[][] igs = new int[entry.parses.size()][];
int j = 0;
for (WordAnalysis parse : entry.parses) {
String rootPart = parse.dictionaryItem.lemma;
roots[j] = rootLm.getVocabulary().indexOf(rootPart);
igs[j] = new int[parse.inflectionalGroups.size()];
for (int k = 0; j < parse.inflectionalGroups.size(); k++) {
igs[j][k] = igLm.getVocabulary().indexOf(parse.inflectionalGroups.get(k).formatNoSurface());
}
j++;
}
awords[i] = new Ambiguous(roots, igs);
i++;
}
awords[i] = endWord;
return awords;
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSentenceAnalyzer method analyze.
public SentenceAnalysis analyze(String sentence) {
SentenceAnalysis sentenceParse = new SentenceAnalysis();
String preprocessed = preProcess(sentence);
for (String s : Splitter.on(" ").omitEmptyStrings().trimResults().split(preprocessed)) {
List<WordAnalysis> parses = turkishMorphology.analyze(s);
sentenceParse.addParse(s, parses);
}
return sentenceParse;
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class TurkishSentenceAnalyzer method bestParse.
/**
* Returns the best parse of a sentence.
*
* @param sentence sentence
* @return best parse.
*/
public List<WordAnalysis> bestParse(String sentence) {
SentenceAnalysis parse = analyze(sentence);
disambiguate(parse);
List<WordAnalysis> bestParse = Lists.newArrayListWithCapacity(parse.size());
for (SentenceAnalysis.Entry entry : parse) {
bestParse.add(entry.parses.get(0));
}
return bestParse;
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class DisambiguateSentences method analyzeAndDisambiguate.
void analyzeAndDisambiguate(String sentence) {
System.out.println("Sentence = " + sentence);
SentenceAnalysis result = sentenceAnalyzer.analyze(sentence);
System.out.println("Before disambiguation.");
writeParseResult(result);
System.out.println("\nAfter disambiguation.");
sentenceAnalyzer.disambiguate(result);
writeParseResult(result);
}
use of zemberek.morphology.analysis.SentenceAnalysis in project zemberek-nlp by ahmetaa.
the class AutomaticLabelingExperiment method processContent.
public String processContent(TurkishSentenceAnalyzer analyzer, String content, boolean useRoots) {
List<Token> docTokens = lexer.tokenize(content);
List<String> reduced = new ArrayList<>(docTokens.size());
for (Token token : docTokens) {
if (token.getType() == TurkishLexer.PercentNumeral || token.getType() == TurkishLexer.Number || token.getType() == TurkishLexer.Punctuation || token.getType() == TurkishLexer.RomanNumeral || token.getType() == TurkishLexer.Time || token.getType() == TurkishLexer.UnknownWord || token.getType() == TurkishLexer.Unknown) {
continue;
}
String tokenStr = token.getText();
reduced.add(tokenStr);
}
String joined = String.join(" ", reduced);
if (useRoots) {
SentenceAnalysis analysis = analyzer.analyze(joined);
analyzer.disambiguate(analysis);
List<String> res = new ArrayList<>();
for (SentenceAnalysis.Entry e : analysis) {
WordAnalysis best = e.parses.get(0);
if (best.isUnknown()) {
res.add(e.input);
continue;
}
List<String> lemmas = best.getLemmas();
if (lemmas.size() == 0) {
continue;
}
res.add(lemmas.get(lemmas.size() - 1));
}
joined = String.join(" ", res);
}
return joined.replaceAll("[']", "").toLowerCase(Turkish.LOCALE);
}
Aggregations