Search in sources :

Example 1 with MapDictionary

use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.

the class Column method createSharedDictionaryReplacement.

/**
 * Creates an id-replacement mapping for shared dictionaries for an {@link Import}.
 * Because Imports are bound to a {@link com.bakdata.conquery.models.worker.Namespace} but the {@link com.bakdata.conquery.models.preproc.Preprocessed} files are not
 * they contain dummy-{@link NsIdRef}. These References are mapped to actual object with valid ids through this
 * generated mapping.
 * <p>
 * In this method for shared dictionaries, it is ensured, that the shared dictionary exists in the storage and it is
 * created if not.
 *
 * @param dicts                 The mapping of column names in the Import to dictionaries in the Import
 * @param storage               The {@link NamespaceStorage} that backs the dictionaries
 * @param out                   The collection for the generated replacement, that are needed during the deserialization of the next
 *                              part of the {@link com.bakdata.conquery.models.preproc.Preprocessed}-file
 * @param sharedDictionaryLocks A collection of locks used for the synchronized creation of shared dictionaries.
 */
public void createSharedDictionaryReplacement(Map<String, Dictionary> dicts, NamespaceStorage storage, Map<DictionaryId, Dictionary> out, IdMutex<DictionaryId> sharedDictionaryLocks) {
    Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
    Preconditions.checkArgument(sharedDictionary != null, "Can only be used for Shared Dictionary based Columns");
    // If the column is based on a shared dict. We reference a new empty dictionary or the existing one
    // but without updated entries. The entries are updated later on, see ImportJob#applyDictionaryMappings.
    Dictionary sharedDict = null;
    final DictionaryId sharedDictId = new DictionaryId(table.getDataset().getId(), getSharedDictionary());
    try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(sharedDictId)) {
        sharedDict = storage.getDictionary(sharedDictId);
        // Create dictionary if not yet present
        if (sharedDict == null) {
            sharedDict = new MapDictionary(table.getDataset(), getSharedDictionary());
            storage.updateDictionary(sharedDict);
        }
    }
    out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dicts.get(getName()).getName()), sharedDict);
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) IdMutex(com.bakdata.conquery.models.identifiable.IdMutex) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary)

Example 2 with MapDictionary

use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.

the class MapTypeGuesser method createGuess.

@Override
public Guess createGuess() {
    IntegerStore indexType = p.decideIndexType();
    StringTypeDictionary type = new StringTypeDictionary(indexType, null);
    long mapSize = MapDictionary.estimateMemoryConsumption(p.getStrings().size(), p.getDecoded().stream().mapToLong(s -> s.length).sum());
    StringTypeEncoded result = new StringTypeEncoded(type, p.getEncoding());
    return new Guess(result, indexType.estimateMemoryConsumptionBytes(), mapSize) {

        @Override
        public StringStore getType() {
            MapDictionary map = new MapDictionary(Dataset.PLACEHOLDER, UUID.randomUUID().toString());
            for (byte[] v : p.getDecoded()) {
                map.add(v);
            }
            type.setDictionary(map);
            return super.getType();
        }
    };
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) StringTypeDictionary(com.bakdata.conquery.models.events.stores.specific.string.StringTypeDictionary) StringTypeEncoded(com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary)

Example 3 with MapDictionary

use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.

the class NamespaceStorage method getPrimaryDictionaryRaw.

@NonNull
public Dictionary getPrimaryDictionaryRaw() {
    final Dictionary dictionary = primaryDictionary.get();
    if (dictionary == null) {
        log.trace("No prior PrimaryDictionary, creating one");
        final MapDictionary newPrimary = new MapDictionary(getDataset(), ConqueryConstants.PRIMARY_DICTIONARY);
        primaryDictionary.update(newPrimary);
        return newPrimary;
    }
    return dictionary;
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) NonNull(lombok.NonNull)

Example 4 with MapDictionary

use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.

the class BigStoreTest method testFull.

@Test
public void testFull() throws JSONException, IOException {
    BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
    }, (e) -> {
    }, MAPPER);
    store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
    Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
    for (int v = 0; v < 1000000; v++) {
        nDict.add(Integer.toHexString(v).getBytes());
    }
    // check if manual serialization deserialization works
    byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict);
    Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
    for (int v = 0; v < 1000000; v++) {
        assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v);
    }
    // check if store works
    store.add(nDict.getId(), nDict);
    // check if the bytes in the store are the same as bytes
    assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
    EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), StringTypeEncoded.Encoding.UTF8);
    for (int v = 0; v < 1000000; v++) {
        assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v);
    }
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) XodusStoreFactory(com.bakdata.conquery.models.config.XodusStoreFactory) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) Test(org.junit.jupiter.api.Test)

Example 5 with MapDictionary

use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.

the class BigStoreTest method testEmpty.

@Test
public void testEmpty() throws JSONException, IOException {
    BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
    }, (e) -> {
    }, MAPPER);
    store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
    Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
    // check if manual serialization deserialization works
    byte[] bytes = MAPPER.writeValueAsBytes(nDict);
    Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
    assertThat(simpleCopy).isEmpty();
    // check if store works
    store.add(nDict.getId(), nDict);
    // check if the bytes in the store are the same as bytes
    assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
    Dictionary copy = store.get(nDict.getId());
    assertThat(copy).isEmpty();
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) XodusStoreFactory(com.bakdata.conquery.models.config.XodusStoreFactory) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) Test(org.junit.jupiter.api.Test)

Aggregations

MapDictionary (com.bakdata.conquery.models.dictionary.MapDictionary)5 Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)4 EncodedDictionary (com.bakdata.conquery.models.dictionary.EncodedDictionary)3 DictionaryId (com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId)3 XodusStoreFactory (com.bakdata.conquery.models.config.XodusStoreFactory)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 SequenceInputStream (java.io.SequenceInputStream)2 Test (org.junit.jupiter.api.Test)2 IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)1 StringTypeDictionary (com.bakdata.conquery.models.events.stores.specific.string.StringTypeDictionary)1 StringTypeEncoded (com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded)1 IdMutex (com.bakdata.conquery.models.identifiable.IdMutex)1 NonNull (lombok.NonNull)1