use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.
the class Column method createSharedDictionaryReplacement.
/**
* Creates an id-replacement mapping for shared dictionaries for an {@link Import}.
* Because Imports are bound to a {@link com.bakdata.conquery.models.worker.Namespace} but the {@link com.bakdata.conquery.models.preproc.Preprocessed} files are not
* they contain dummy-{@link NsIdRef}. These References are mapped to actual object with valid ids through this
* generated mapping.
* <p>
* In this method for shared dictionaries, it is ensured, that the shared dictionary exists in the storage and it is
* created if not.
*
* @param dicts The mapping of column names in the Import to dictionaries in the Import
* @param storage The {@link NamespaceStorage} that backs the dictionaries
* @param out The collection for the generated replacement, that are needed during the deserialization of the next
* part of the {@link com.bakdata.conquery.models.preproc.Preprocessed}-file
* @param sharedDictionaryLocks A collection of locks used for the synchronized creation of shared dictionaries.
*/
public void createSharedDictionaryReplacement(Map<String, Dictionary> dicts, NamespaceStorage storage, Map<DictionaryId, Dictionary> out, IdMutex<DictionaryId> sharedDictionaryLocks) {
Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
Preconditions.checkArgument(sharedDictionary != null, "Can only be used for Shared Dictionary based Columns");
// If the column is based on a shared dict. We reference a new empty dictionary or the existing one
// but without updated entries. The entries are updated later on, see ImportJob#applyDictionaryMappings.
Dictionary sharedDict = null;
final DictionaryId sharedDictId = new DictionaryId(table.getDataset().getId(), getSharedDictionary());
try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(sharedDictId)) {
sharedDict = storage.getDictionary(sharedDictId);
// Create dictionary if not yet present
if (sharedDict == null) {
sharedDict = new MapDictionary(table.getDataset(), getSharedDictionary());
storage.updateDictionary(sharedDict);
}
}
out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dicts.get(getName()).getName()), sharedDict);
}
use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.
the class BigStoreTest method testFull.
@Test
public void testFull() throws JSONException, IOException {
BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
}, (e) -> {
}, MAPPER);
store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
for (int v = 0; v < 1000000; v++) {
nDict.add(Integer.toHexString(v).getBytes());
}
// check if manual serialization deserialization works
byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict);
Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
for (int v = 0; v < 1000000; v++) {
assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v);
}
// check if store works
store.add(nDict.getId(), nDict);
// check if the bytes in the store are the same as bytes
assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), StringTypeEncoded.Encoding.UTF8);
for (int v = 0; v < 1000000; v++) {
assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v);
}
}
use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.
the class BigStoreTest method testEmpty.
@Test
public void testEmpty() throws JSONException, IOException {
BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
}, (e) -> {
}, MAPPER);
store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
// check if manual serialization deserialization works
byte[] bytes = MAPPER.writeValueAsBytes(nDict);
Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
assertThat(simpleCopy).isEmpty();
// check if store works
store.add(nDict.getId(), nDict);
// check if the bytes in the store are the same as bytes
assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
Dictionary copy = store.get(nDict.getId());
assertThat(copy).isEmpty();
}
use of com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId in project conquery by bakdata.
the class Column method createSingleColumnDictionaryReplacement.
/**
* See {@link Column#createSharedDictionaryReplacement(Map, NamespaceStorage, Map, IdMutex)}
*/
public void createSingleColumnDictionaryReplacement(Map<String, Dictionary> dicts, String importName, Map<DictionaryId, Dictionary> out) {
Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
Preconditions.checkArgument(sharedDictionary == null, "Cannot be used for Shared Dictionary based Columns.");
final Dictionary dict = dicts.get(getName());
final String name = computeDefaultDictionaryName(importName);
out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dict.getName()), dict);
dict.setDataset(table.getDataset());
dict.setName(name);
}
Aggregations