use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.
the class Column method createSharedDictionaryReplacement.
/**
* Creates an id-replacement mapping for shared dictionaries for an {@link Import}.
* Because Imports are bound to a {@link com.bakdata.conquery.models.worker.Namespace} but the {@link com.bakdata.conquery.models.preproc.Preprocessed} files are not
* they contain dummy-{@link NsIdRef}. These References are mapped to actual object with valid ids through this
* generated mapping.
* <p>
* In this method for shared dictionaries, it is ensured, that the shared dictionary exists in the storage and it is
* created if not.
*
* @param dicts The mapping of column names in the Import to dictionaries in the Import
* @param storage The {@link NamespaceStorage} that backs the dictionaries
* @param out The collection for the generated replacement, that are needed during the deserialization of the next
* part of the {@link com.bakdata.conquery.models.preproc.Preprocessed}-file
* @param sharedDictionaryLocks A collection of locks used for the synchronized creation of shared dictionaries.
*/
public void createSharedDictionaryReplacement(Map<String, Dictionary> dicts, NamespaceStorage storage, Map<DictionaryId, Dictionary> out, IdMutex<DictionaryId> sharedDictionaryLocks) {
Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
Preconditions.checkArgument(sharedDictionary != null, "Can only be used for Shared Dictionary based Columns");
// If the column is based on a shared dict. We reference a new empty dictionary or the existing one
// but without updated entries. The entries are updated later on, see ImportJob#applyDictionaryMappings.
Dictionary sharedDict = null;
final DictionaryId sharedDictId = new DictionaryId(table.getDataset().getId(), getSharedDictionary());
try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(sharedDictId)) {
sharedDict = storage.getDictionary(sharedDictId);
// Create dictionary if not yet present
if (sharedDict == null) {
sharedDict = new MapDictionary(table.getDataset(), getSharedDictionary());
storage.updateDictionary(sharedDict);
}
}
out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dicts.get(getName()).getName()), sharedDict);
}
use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.
the class MapTypeGuesser method createGuess.
@Override
public Guess createGuess() {
IntegerStore indexType = p.decideIndexType();
StringTypeDictionary type = new StringTypeDictionary(indexType, null);
long mapSize = MapDictionary.estimateMemoryConsumption(p.getStrings().size(), p.getDecoded().stream().mapToLong(s -> s.length).sum());
StringTypeEncoded result = new StringTypeEncoded(type, p.getEncoding());
return new Guess(result, indexType.estimateMemoryConsumptionBytes(), mapSize) {
@Override
public StringStore getType() {
MapDictionary map = new MapDictionary(Dataset.PLACEHOLDER, UUID.randomUUID().toString());
for (byte[] v : p.getDecoded()) {
map.add(v);
}
type.setDictionary(map);
return super.getType();
}
};
}
use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.
the class NamespaceStorage method getPrimaryDictionaryRaw.
@NonNull
public Dictionary getPrimaryDictionaryRaw() {
final Dictionary dictionary = primaryDictionary.get();
if (dictionary == null) {
log.trace("No prior PrimaryDictionary, creating one");
final MapDictionary newPrimary = new MapDictionary(getDataset(), ConqueryConstants.PRIMARY_DICTIONARY);
primaryDictionary.update(newPrimary);
return newPrimary;
}
return dictionary;
}
use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.
the class BigStoreTest method testFull.
@Test
public void testFull() throws JSONException, IOException {
BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
}, (e) -> {
}, MAPPER);
store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
for (int v = 0; v < 1000000; v++) {
nDict.add(Integer.toHexString(v).getBytes());
}
// check if manual serialization deserialization works
byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict);
Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
for (int v = 0; v < 1000000; v++) {
assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v);
}
// check if store works
store.add(nDict.getId(), nDict);
// check if the bytes in the store are the same as bytes
assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), StringTypeEncoded.Encoding.UTF8);
for (int v = 0; v < 1000000; v++) {
assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v);
}
}
use of com.bakdata.conquery.models.dictionary.MapDictionary in project conquery by bakdata.
the class BigStoreTest method testEmpty.
@Test
public void testEmpty() throws JSONException, IOException {
BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
}, (e) -> {
}, MAPPER);
store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
// check if manual serialization deserialization works
byte[] bytes = MAPPER.writeValueAsBytes(nDict);
Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
assertThat(simpleCopy).isEmpty();
// check if store works
store.add(nDict.getId(), nDict);
// check if the bytes in the store are the same as bytes
assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
Dictionary copy = store.get(nDict.getId());
assertThat(copy).isEmpty();
}
Aggregations