Search in sources :

Example 1 with Dictionary

use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.

the class Serializer method getDictionaryItems.

private static RootLexicon getDictionaryItems(byte[] bytes) throws IOException {
    long start = System.currentTimeMillis();
    Dictionary readDictionary = Dictionary.parseFrom(bytes);
    RootLexicon loadedLexicon = new RootLexicon();
    // some items contains references to other items. We need to apply this
    // link after creating the lexicon.
    Map<String, String> referenceItemIdMap = new HashMap<>();
    for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
        DictionaryItem actual = convertToDictionaryItem(item);
        loadedLexicon.add(actual);
        if (item.getReference() != null && !item.getReference().isEmpty()) {
            referenceItemIdMap.put(actual.id, item.getReference());
        }
    }
    for (String itemId : referenceItemIdMap.keySet()) {
        DictionaryItem item = loadedLexicon.getItemById(itemId);
        DictionaryItem ref = loadedLexicon.getItemById(referenceItemIdMap.get(itemId));
        item.setReferenceItem(ref);
    }
    long end = System.currentTimeMillis();
    Log.info("Root lexicon created in %d ms.", (end - start));
    return loadedLexicon;
}
Also used : LexiconProto(zemberek.morphology.lexicon.proto.LexiconProto) Dictionary(zemberek.morphology.lexicon.proto.LexiconProto.Dictionary) HashMap(java.util.HashMap)

Example 2 with Dictionary

use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.

the class Serializer method serializeDeserializeTest.

private static void serializeDeserializeTest() throws IOException {
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    RootLexicon lexicon = morphology.getLexicon();
    Dictionary.Builder builder = Dictionary.newBuilder();
    for (DictionaryItem item : lexicon.getAllItems()) {
        builder.addItems(convertToProto(item));
    }
    Dictionary dictionary = builder.build();
    System.out.println("Total size of serialized dictionary: " + dictionary.getSerializedSize());
    File f = new File("lexicon.bin");
    BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f));
    bos.write(dictionary.toByteArray());
    bos.close();
    long start = System.currentTimeMillis();
    byte[] serialized = Files.readAllBytes(new File("lexicon.bin").toPath());
    long end = System.currentTimeMillis();
    Log.info("Dictionary loaded in %d ms.", (end - start));
    start = System.currentTimeMillis();
    Dictionary readDictionary = Dictionary.parseFrom(serialized);
    end = System.currentTimeMillis();
    Log.info("Dictionary deserialized in %d ms.", (end - start));
    System.out.println("Total size of read dictionary: " + readDictionary.getSerializedSize());
    start = System.currentTimeMillis();
    RootLexicon loadedLexicon = new RootLexicon();
    for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
        loadedLexicon.add(convertToDictionaryItem(item));
    }
    end = System.currentTimeMillis();
    Log.info("RootLexicon generated in %d ms.", (end - start));
}
Also used : LexiconProto(zemberek.morphology.lexicon.proto.LexiconProto) Dictionary(zemberek.morphology.lexicon.proto.LexiconProto.Dictionary) FileOutputStream(java.io.FileOutputStream) TurkishMorphology(zemberek.morphology.analysis.tr.TurkishMorphology) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 3 with Dictionary

use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.

the class Serializer method save.

public static void save(RootLexicon lexicon, Path outPath) throws IOException {
    Dictionary.Builder builder = Dictionary.newBuilder();
    for (DictionaryItem item : lexicon.getAllItems()) {
        builder.addItems(convertToProto(item));
    }
    Dictionary dictionary = builder.build();
    Files.write(outPath, dictionary.toByteArray(), StandardOpenOption.CREATE);
}
Also used : Dictionary(zemberek.morphology.lexicon.proto.LexiconProto.Dictionary)

Example 4 with Dictionary

use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.

the class DictionarySerializer method getDictionaryItems.

private static RootLexicon getDictionaryItems(byte[] bytes) throws IOException {
    long start = System.currentTimeMillis();
    Dictionary readDictionary = Dictionary.parseFrom(bytes);
    RootLexicon loadedLexicon = new RootLexicon();
    // some items contains references to other items. We need to apply this
    // link after creating the lexicon.
    Map<String, String> referenceItemIdMap = new HashMap<>();
    for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
        DictionaryItem actual = convertToDictionaryItem(item);
        loadedLexicon.add(actual);
        if (item.getReference() != null && !item.getReference().isEmpty()) {
            referenceItemIdMap.put(actual.id, item.getReference());
        }
    }
    for (String itemId : referenceItemIdMap.keySet()) {
        DictionaryItem item = loadedLexicon.getItemById(itemId);
        DictionaryItem ref = loadedLexicon.getItemById(referenceItemIdMap.get(itemId));
        item.setReferenceItem(ref);
    }
    long end = System.currentTimeMillis();
    Log.info("Root lexicon created in %d ms.", (end - start));
    return loadedLexicon;
}
Also used : LexiconProto(zemberek.morphology.lexicon.proto.LexiconProto) Dictionary(zemberek.morphology.lexicon.proto.LexiconProto.Dictionary) HashMap(java.util.HashMap)

Example 5 with Dictionary

use of zemberek.morphology.lexicon.proto.LexiconProto.Dictionary in project zemberek-nlp by ahmetaa.

the class DictionarySerializer method serializeDeserializeTest.

private static void serializeDeserializeTest() throws IOException {
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    RootLexicon lexicon = morphology.getLexicon();
    Dictionary.Builder builder = Dictionary.newBuilder();
    for (DictionaryItem item : lexicon.getAllItems()) {
        builder.addItems(convertToProto(item));
    }
    Dictionary dictionary = builder.build();
    System.out.println("Total size of serialized dictionary: " + dictionary.getSerializedSize());
    Path f = Files.createTempFile("lexicon", ".bin");
    BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f.toFile()));
    bos.write(dictionary.toByteArray());
    bos.close();
    long start = System.currentTimeMillis();
    byte[] serialized = Files.readAllBytes(f);
    long end = System.currentTimeMillis();
    Log.info("Dictionary loaded in %d ms.", (end - start));
    start = System.currentTimeMillis();
    Dictionary readDictionary = Dictionary.parseFrom(serialized);
    end = System.currentTimeMillis();
    Log.info("Dictionary deserialized in %d ms.", (end - start));
    System.out.println("Total size of read dictionary: " + readDictionary.getSerializedSize());
    start = System.currentTimeMillis();
    RootLexicon loadedLexicon = new RootLexicon();
    for (LexiconProto.DictionaryItem item : readDictionary.getItemsList()) {
        loadedLexicon.add(convertToDictionaryItem(item));
    }
    end = System.currentTimeMillis();
    Log.info("RootLexicon generated in %d ms.", (end - start));
}
Also used : Path(java.nio.file.Path) LexiconProto(zemberek.morphology.lexicon.proto.LexiconProto) Dictionary(zemberek.morphology.lexicon.proto.LexiconProto.Dictionary) FileOutputStream(java.io.FileOutputStream) TurkishMorphology(zemberek.morphology.TurkishMorphology) BufferedOutputStream(java.io.BufferedOutputStream)

Aggregations

Dictionary (zemberek.morphology.lexicon.proto.LexiconProto.Dictionary)6 LexiconProto (zemberek.morphology.lexicon.proto.LexiconProto)4 BufferedOutputStream (java.io.BufferedOutputStream)2 FileOutputStream (java.io.FileOutputStream)2 HashMap (java.util.HashMap)2 File (java.io.File)1 Path (java.nio.file.Path)1 TurkishMorphology (zemberek.morphology.TurkishMorphology)1 TurkishMorphology (zemberek.morphology.analysis.tr.TurkishMorphology)1