use of zemberek.morphology.analysis.tr.TurkishMorphology in project zemberek-nlp by ahmetaa.
the class Serializer method serializeDeserializeTest.
private static void serializeDeserializeTest() throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
RootLexicon lexicon = morphology.getLexicon();
Dictionary.Builder builder = Dictionary.newBuilder();
for (DictionaryItem item : lexicon.getAllItems()) {
builder.addItems(convertToProto(item));
}
Dictionary dictionary = builder.build();
System.out.println("Total size of serialized dictionary: " + dictionary.getSerializedSize());
File f = new File("lexicon.bin");
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f));
bos.write(dictionary.toByteArray());
bos.close();
long start = System.currentTimeMillis();
byte[] serialized = Files.readAllBytes(new File("lexicon.bin").toPath());
long end = System.currentTimeMillis();
Log.info("Dictionary loaded in %d ms.", (end - start));
start = System.currentTimeMillis();
Dictionary readDictionary = Dictionary.parseFrom(serialized);
end = System.currentTimeMillis();
Log.info("Dictionary deserialized in %d ms.", (end - start));
System.out.println("Total size of read dictionary: " + readDictionary.getSerializedSize());
start = System.currentTimeMillis();
RootLexicon loadedLexicon = new RootLexicon();
for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
loadedLexicon.add(convertToDictionaryItem(item));
}
end = System.currentTimeMillis();
Log.info("RootLexicon generated in %d ms.", (end - start));
}
use of zemberek.morphology.analysis.tr.TurkishMorphology in project zemberek-nlp by ahmetaa.
the class Serializer method createDefaultDictionary.
public static void createDefaultDictionary(Path path) throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder().addDefaultDictionaries().build();
save(morphology.getLexicon(), path);
}
use of zemberek.morphology.analysis.tr.TurkishMorphology in project zemberek-nlp by ahmetaa.
the class WordAnalysisFormatterTest method formatKnownProperNounsNoQuote.
@Test
public void formatKnownProperNounsNoQuote() throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder().addDictionaryLines("Blah [A:NoQuote]").build();
String[] inputs = { "blaha", "Blahta" };
String[] expected = { "Blaha", "Blahta" };
check(morphology, inputs, expected);
}
use of zemberek.morphology.analysis.tr.TurkishMorphology in project zemberek-nlp by ahmetaa.
the class WordAnalysisFormatterTest method formatKnownProperNouns.
@Test
public void formatKnownProperNouns() throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder().addDictionaryLines("Ankara", "Iphone [Pr:ayfon]", "Google [Pr:gugıl]").build();
String[] inputs = { "ankarada", "ıphonumun", "googledan", "Iphone", "Google", "Googlesa" };
String[] expected = { "Ankara'da", "Iphone'umun", "Google'dan", "Iphone", "Google", "Google'sa" };
check(morphology, inputs, expected);
}
use of zemberek.morphology.analysis.tr.TurkishMorphology in project zemberek-nlp by ahmetaa.
the class WordAnalysisFormatterTest method formatNonProperNoun.
@Test
public void formatNonProperNoun() throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder().addDictionaryLines("elma", "kitap", "demek", "evet").build();
String[] inputs = { "elmamadaki", "elma", "kitalarımdan", "kitabımızsa", "diyebileceğimiz", "dedi", "evet" };
WordAnalysisFormatter formatter = new WordAnalysisFormatter();
for (String input : inputs) {
List<WordAnalysis> results = morphology.analyze(input);
for (WordAnalysis result : results) {
Assert.assertEquals(input, formatter.format(result, "'"));
}
}
}
Aggregations