use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.
the class Serializer method getDictionaryItems.
private static RootLexicon getDictionaryItems(byte[] bytes) throws IOException {
long start = System.currentTimeMillis();
Dictionary readDictionary = Dictionary.parseFrom(bytes);
RootLexicon loadedLexicon = new RootLexicon();
// some items contains references to other items. We need to apply this
// link after creating the lexicon.
Map<String, String> referenceItemIdMap = new HashMap<>();
for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
DictionaryItem actual = convertToDictionaryItem(item);
loadedLexicon.add(actual);
if (item.getReference() != null && !item.getReference().isEmpty()) {
referenceItemIdMap.put(actual.id, item.getReference());
}
}
for (String itemId : referenceItemIdMap.keySet()) {
DictionaryItem item = loadedLexicon.getItemById(itemId);
DictionaryItem ref = loadedLexicon.getItemById(referenceItemIdMap.get(itemId));
item.setReferenceItem(ref);
}
long end = System.currentTimeMillis();
Log.info("Root lexicon created in %d ms.", (end - start));
return loadedLexicon;
}
use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.
the class Serializer method serializeDeserializeTest.
private static void serializeDeserializeTest() throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
RootLexicon lexicon = morphology.getLexicon();
Dictionary.Builder builder = Dictionary.newBuilder();
for (DictionaryItem item : lexicon.getAllItems()) {
builder.addItems(convertToProto(item));
}
Dictionary dictionary = builder.build();
System.out.println("Total size of serialized dictionary: " + dictionary.getSerializedSize());
File f = new File("lexicon.bin");
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f));
bos.write(dictionary.toByteArray());
bos.close();
long start = System.currentTimeMillis();
byte[] serialized = Files.readAllBytes(new File("lexicon.bin").toPath());
long end = System.currentTimeMillis();
Log.info("Dictionary loaded in %d ms.", (end - start));
start = System.currentTimeMillis();
Dictionary readDictionary = Dictionary.parseFrom(serialized);
end = System.currentTimeMillis();
Log.info("Dictionary deserialized in %d ms.", (end - start));
System.out.println("Total size of read dictionary: " + readDictionary.getSerializedSize());
start = System.currentTimeMillis();
RootLexicon loadedLexicon = new RootLexicon();
for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
loadedLexicon.add(convertToDictionaryItem(item));
}
end = System.currentTimeMillis();
Log.info("RootLexicon generated in %d ms.", (end - start));
}
use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.
the class Serializer method save.
public static void save(RootLexicon lexicon, Path outPath) throws IOException {
Dictionary.Builder builder = Dictionary.newBuilder();
for (DictionaryItem item : lexicon.getAllItems()) {
builder.addItems(convertToProto(item));
}
Dictionary dictionary = builder.build();
Files.write(outPath, dictionary.toByteArray(), StandardOpenOption.CREATE);
}
use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.
the class DictionarySerializer method getDictionaryItems.
private static RootLexicon getDictionaryItems(byte[] bytes) throws IOException {
long start = System.currentTimeMillis();
Dictionary readDictionary = Dictionary.parseFrom(bytes);
RootLexicon loadedLexicon = new RootLexicon();
// some items contains references to other items. We need to apply this
// link after creating the lexicon.
Map<String, String> referenceItemIdMap = new HashMap<>();
for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
DictionaryItem actual = convertToDictionaryItem(item);
loadedLexicon.add(actual);
if (item.getReference() != null && !item.getReference().isEmpty()) {
referenceItemIdMap.put(actual.id, item.getReference());
}
}
for (String itemId : referenceItemIdMap.keySet()) {
DictionaryItem item = loadedLexicon.getItemById(itemId);
DictionaryItem ref = loadedLexicon.getItemById(referenceItemIdMap.get(itemId));
item.setReferenceItem(ref);
}
long end = System.currentTimeMillis();
Log.info("Root lexicon created in %d ms.", (end - start));
return loadedLexicon;
}
use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.
the class DictionarySerializer method serializeDeserializeTest.
private static void serializeDeserializeTest() throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
RootLexicon lexicon = morphology.getLexicon();
Dictionary.Builder builder = Dictionary.newBuilder();
for (DictionaryItem item : lexicon.getAllItems()) {
builder.addItems(convertToProto(item));
}
Dictionary dictionary = builder.build();
System.out.println("Total size of serialized dictionary: " + dictionary.getSerializedSize());
Path f = Files.createTempFile("lexicon", ".bin");
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f.toFile()));
bos.write(dictionary.toByteArray());
bos.close();
long start = System.currentTimeMillis();
byte[] serialized = Files.readAllBytes(f);
long end = System.currentTimeMillis();
Log.info("Dictionary loaded in %d ms.", (end - start));
start = System.currentTimeMillis();
Dictionary readDictionary = Dictionary.parseFrom(serialized);
end = System.currentTimeMillis();
Log.info("Dictionary deserialized in %d ms.", (end - start));
System.out.println("Total size of read dictionary: " + readDictionary.getSerializedSize());
start = System.currentTimeMillis();
RootLexicon loadedLexicon = new RootLexicon();
for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
loadedLexicon.add(convertToDictionaryItem(item));
}
end = System.currentTimeMillis();
Log.info("RootLexicon generated in %d ms.", (end - start));
}
Aggregations