use of zemberek.morphology.analysis.WordAnalysis in project zemberek-nlp by ahmetaa.
the class WordSimilarityConsole method run.
void run(Path vectorFile, Path vocabFile) throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
System.out.println("Loading from " + vectorFile);
WordVectorLookup lookup = WordVectorLookup.loadFromBinaryFast(vectorFile, vocabFile);
WordVectorLookup.DistanceMatcher distanceMatcher = new WordVectorLookup.DistanceMatcher(lookup);
String input;
System.out.println("Enter word:");
Scanner sc = new Scanner(System.in);
input = sc.nextLine();
while (!input.equals("exit") && !input.equals("quit")) {
if (!lookup.containsWord(input)) {
Log.info(input + " cannot be found.");
input = sc.nextLine();
continue;
}
List<WordDistances.Distance> distances = distanceMatcher.nearestK(input, 30);
List<String> dist = new ArrayList<>(distances.size());
dist.addAll(distances.stream().map(d -> d.word).collect(Collectors.toList()));
System.out.println(String.join(" ", dist));
List<String> noParse = new ArrayList<>();
for (String s : dist) {
WordAnalysis an = morphology.analyze(s);
if (an.isCorrect() || (an.analysisCount() == 1 && an.getAnalysisResults().get(0).getDictionaryItem().primaryPos == PrimaryPos.Unknown)) {
noParse.add(s);
}
}
System.out.println(String.join(" ", noParse));
input = sc.nextLine();
}
}
Aggregations