Search in sources :

Example 6 with Dictionary

use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.

the class BigStoreTest method testFull.

@Test
public void testFull() throws JSONException, IOException {
    BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
    }, (e) -> {
    }, MAPPER);
    store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
    Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
    for (int v = 0; v < 1000000; v++) {
        nDict.add(Integer.toHexString(v).getBytes());
    }
    // check if manual serialization deserialization works
    byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict);
    Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
    for (int v = 0; v < 1000000; v++) {
        assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v);
    }
    // check if store works
    store.add(nDict.getId(), nDict);
    // check if the bytes in the store are the same as bytes
    assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
    EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), StringTypeEncoded.Encoding.UTF8);
    for (int v = 0; v < 1000000; v++) {
        assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v);
    }
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) XodusStoreFactory(com.bakdata.conquery.models.config.XodusStoreFactory) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) Test(org.junit.jupiter.api.Test)

Example 7 with Dictionary

use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.

the class Preprocessed method encodePrimaryDictionary.

private Dictionary encodePrimaryDictionary() {
    log.debug("Encode primary Dictionary");
    primaryColumn.applyEncoding(StringTypeEncoded.Encoding.UTF8);
    final Dictionary primaryDictionary = new MapTypeGuesser(primaryColumn).createGuess().getType().getUnderlyingDictionary();
    log.trace("\tPrimaryColumn -> {}", primaryDictionary);
    return primaryDictionary;
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapTypeGuesser(com.bakdata.conquery.models.preproc.parser.specific.string.MapTypeGuesser)

Example 8 with Dictionary

use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.

the class Preprocessed method collectDictionaries.

private static Map<String, Dictionary> collectDictionaries(Map<String, ColumnStore> columnStores) {
    final Map<String, Dictionary> collect = new HashMap<>();
    for (Map.Entry<String, ColumnStore> entry : columnStores.entrySet()) {
        if (!(entry.getValue() instanceof StringStore)) {
            continue;
        }
        final Dictionary dictionary = ((StringStore) entry.getValue()).getUnderlyingDictionary();
        if (dictionary == null) {
            continue;
        }
        collect.put(entry.getKey(), dictionary);
    }
    return collect;
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) HashMap(java.util.HashMap) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) HashMap(java.util.HashMap) Int2IntAVLTreeMap(it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 9 with Dictionary

use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.

the class Preprocessed method write.

public void write(File file) throws IOException {
    Int2IntMap entityStart = new Int2IntAVLTreeMap();
    Int2IntMap entityLength = new Int2IntAVLTreeMap();
    calculateEntitySpans(entityStart, entityLength);
    final IntSummaryStatistics statistics = entityLength.values().intStream().summaryStatistics();
    log.info("Statistics = {}", statistics);
    Map<String, ColumnStore> columnStores = combineStores(entityStart);
    Dictionary primaryDictionary = encodePrimaryDictionary();
    Map<String, Dictionary> dicts = collectDictionaries(columnStores);
    log.debug("Writing Headers");
    int hash = descriptor.calculateValidityHash(job.getCsvDirectory(), job.getTag());
    PreprocessedHeader header = new PreprocessedHeader(descriptor.getName(), descriptor.getTable(), rows, columns, hash);
    final PreprocessedDictionaries dictionaries = new PreprocessedDictionaries(primaryDictionary, dicts);
    final PreprocessedData data = new PreprocessedData(entityStart, entityLength, columnStores);
    writePreprocessed(file, header, dictionaries, data);
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) IntSummaryStatistics(java.util.IntSummaryStatistics) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) Int2IntAVLTreeMap(it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap)

Example 10 with Dictionary

use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.

the class BigStoreTest method testEmpty.

@Test
public void testEmpty() throws JSONException, IOException {
    BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
    }, (e) -> {
    }, MAPPER);
    store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
    Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
    // check if manual serialization deserialization works
    byte[] bytes = MAPPER.writeValueAsBytes(nDict);
    Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
    assertThat(simpleCopy).isEmpty();
    // check if store works
    store.add(nDict.getId(), nDict);
    // check if the bytes in the store are the same as bytes
    assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
    Dictionary copy = store.get(nDict.getId());
    assertThat(copy).isEmpty();
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) XodusStoreFactory(com.bakdata.conquery.models.config.XodusStoreFactory) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) Test(org.junit.jupiter.api.Test)

Aggregations

Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)13 MapDictionary (com.bakdata.conquery.models.dictionary.MapDictionary)5 DictionaryId (com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId)4 EncodedDictionary (com.bakdata.conquery.models.dictionary.EncodedDictionary)3 XodusStoreFactory (com.bakdata.conquery.models.config.XodusStoreFactory)2 DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)2 ColumnStore (com.bakdata.conquery.models.events.stores.root.ColumnStore)2 Int2IntAVLTreeMap (it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap)2 Int2IntMap (it.unimi.dsi.fastutil.ints.Int2IntMap)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 SequenceInputStream (java.io.SequenceInputStream)2 CPSTypeIdResolver (com.bakdata.conquery.io.cps.CPSTypeIdResolver)1 MetaStorage (com.bakdata.conquery.io.storage.MetaStorage)1 NamespaceStorage (com.bakdata.conquery.io.storage.NamespaceStorage)1 AuthorizationHelper (com.bakdata.conquery.models.auth.AuthorizationHelper)1 Group (com.bakdata.conquery.models.auth.entities.Group)1 Role (com.bakdata.conquery.models.auth.entities.Role)1 User (com.bakdata.conquery.models.auth.entities.User)1 Ability (com.bakdata.conquery.models.auth.permissions.Ability)1 ConqueryPermission (com.bakdata.conquery.models.auth.permissions.ConqueryPermission)1