Search in sources :

Example 1 with DictionaryId

use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.

the class Column method createSharedDictionaryReplacement.

/**
 * Creates an id-replacement mapping for shared dictionaries for an {@link Import}.
 * Because Imports are bound to a {@link com.bakdata.conquery.models.worker.Namespace} but the {@link com.bakdata.conquery.models.preproc.Preprocessed} files are not
 * they contain dummy-{@link NsIdRef}. These References are mapped to actual object with valid ids through this
 * generated mapping.
 * <p>
 * In this method for shared dictionaries, it is ensured, that the shared dictionary exists in the storage and it is
 * created if not.
 *
 * @param dicts                 The mapping of column names in the Import to dictionaries in the Import
 * @param storage               The {@link NamespaceStorage} that backs the dictionaries
 * @param out                   The collection for the generated replacement, that are needed during the deserialization of the next
 *                              part of the {@link com.bakdata.conquery.models.preproc.Preprocessed}-file
 * @param sharedDictionaryLocks A collection of locks used for the synchronized creation of shared dictionaries.
 */
public void createSharedDictionaryReplacement(Map<String, Dictionary> dicts, NamespaceStorage storage, Map<DictionaryId, Dictionary> out, IdMutex<DictionaryId> sharedDictionaryLocks) {
    Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
    Preconditions.checkArgument(sharedDictionary != null, "Can only be used for Shared Dictionary based Columns");
    // If the column is based on a shared dict. We reference a new empty dictionary or the existing one
    // but without updated entries. The entries are updated later on, see ImportJob#applyDictionaryMappings.
    Dictionary sharedDict = null;
    final DictionaryId sharedDictId = new DictionaryId(table.getDataset().getId(), getSharedDictionary());
    try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(sharedDictId)) {
        sharedDict = storage.getDictionary(sharedDictId);
        // Create dictionary if not yet present
        if (sharedDict == null) {
            sharedDict = new MapDictionary(table.getDataset(), getSharedDictionary());
            storage.updateDictionary(sharedDict);
        }
    }
    out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dicts.get(getName()).getName()), sharedDict);
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) IdMutex(com.bakdata.conquery.models.identifiable.IdMutex) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary)

Example 2 with DictionaryId

use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.

the class BigStoreTest method testFull.

@Test
public void testFull() throws JSONException, IOException {
    BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
    }, (e) -> {
    }, MAPPER);
    store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
    Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
    for (int v = 0; v < 1000000; v++) {
        nDict.add(Integer.toHexString(v).getBytes());
    }
    // check if manual serialization deserialization works
    byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict);
    Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
    for (int v = 0; v < 1000000; v++) {
        assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v);
    }
    // check if store works
    store.add(nDict.getId(), nDict);
    // check if the bytes in the store are the same as bytes
    assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
    EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), StringTypeEncoded.Encoding.UTF8);
    for (int v = 0; v < 1000000; v++) {
        assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v);
    }
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) XodusStoreFactory(com.bakdata.conquery.models.config.XodusStoreFactory) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) Test(org.junit.jupiter.api.Test)

Example 3 with DictionaryId

use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.

the class BigStoreTest method testEmpty.

@Test
public void testEmpty() throws JSONException, IOException {
    BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
    }, (e) -> {
    }, MAPPER);
    store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
    Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
    // check if manual serialization deserialization works
    byte[] bytes = MAPPER.writeValueAsBytes(nDict);
    Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
    assertThat(simpleCopy).isEmpty();
    // check if store works
    store.add(nDict.getId(), nDict);
    // check if the bytes in the store are the same as bytes
    assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
    Dictionary copy = store.get(nDict.getId());
    assertThat(copy).isEmpty();
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) EncodedDictionary(com.bakdata.conquery.models.dictionary.EncodedDictionary) XodusStoreFactory(com.bakdata.conquery.models.config.XodusStoreFactory) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) Test(org.junit.jupiter.api.Test)

Example 4 with DictionaryId

use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.

the class Column method createSingleColumnDictionaryReplacement.

/**
 * See {@link Column#createSharedDictionaryReplacement(Map, NamespaceStorage, Map, IdMutex)}
 */
public void createSingleColumnDictionaryReplacement(Map<String, Dictionary> dicts, String importName, Map<DictionaryId, Dictionary> out) {
    Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
    Preconditions.checkArgument(sharedDictionary == null, "Cannot be used for Shared Dictionary based Columns.");
    final Dictionary dict = dicts.get(getName());
    final String name = computeDefaultDictionaryName(importName);
    out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dict.getName()), dict);
    dict.setDataset(table.getDataset());
    dict.setName(name);
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary) DictionaryId(com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId) ToString(lombok.ToString)

Aggregations

Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)4 MapDictionary (com.bakdata.conquery.models.dictionary.MapDictionary)4 DictionaryId (com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId)4 XodusStoreFactory (com.bakdata.conquery.models.config.XodusStoreFactory)2 EncodedDictionary (com.bakdata.conquery.models.dictionary.EncodedDictionary)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 SequenceInputStream (java.io.SequenceInputStream)2 Test (org.junit.jupiter.api.Test)2 IdMutex (com.bakdata.conquery.models.identifiable.IdMutex)1 ToString (lombok.ToString)1