use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class BigStoreTest method testFull.
@Test
public void testFull() throws JSONException, IOException {
BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
}, (e) -> {
}, MAPPER);
store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
for (int v = 0; v < 1000000; v++) {
nDict.add(Integer.toHexString(v).getBytes());
}
// check if manual serialization deserialization works
byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict);
Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
for (int v = 0; v < 1000000; v++) {
assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v);
}
// check if store works
store.add(nDict.getId(), nDict);
// check if the bytes in the store are the same as bytes
assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), StringTypeEncoded.Encoding.UTF8);
for (int v = 0; v < 1000000; v++) {
assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v);
}
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class Preprocessed method encodePrimaryDictionary.
private Dictionary encodePrimaryDictionary() {
log.debug("Encode primary Dictionary");
primaryColumn.applyEncoding(StringTypeEncoded.Encoding.UTF8);
final Dictionary primaryDictionary = new MapTypeGuesser(primaryColumn).createGuess().getType().getUnderlyingDictionary();
log.trace("\tPrimaryColumn -> {}", primaryDictionary);
return primaryDictionary;
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class Preprocessed method collectDictionaries.
private static Map<String, Dictionary> collectDictionaries(Map<String, ColumnStore> columnStores) {
final Map<String, Dictionary> collect = new HashMap<>();
for (Map.Entry<String, ColumnStore> entry : columnStores.entrySet()) {
if (!(entry.getValue() instanceof StringStore)) {
continue;
}
final Dictionary dictionary = ((StringStore) entry.getValue()).getUnderlyingDictionary();
if (dictionary == null) {
continue;
}
collect.put(entry.getKey(), dictionary);
}
return collect;
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class Preprocessed method write.
public void write(File file) throws IOException {
Int2IntMap entityStart = new Int2IntAVLTreeMap();
Int2IntMap entityLength = new Int2IntAVLTreeMap();
calculateEntitySpans(entityStart, entityLength);
final IntSummaryStatistics statistics = entityLength.values().intStream().summaryStatistics();
log.info("Statistics = {}", statistics);
Map<String, ColumnStore> columnStores = combineStores(entityStart);
Dictionary primaryDictionary = encodePrimaryDictionary();
Map<String, Dictionary> dicts = collectDictionaries(columnStores);
log.debug("Writing Headers");
int hash = descriptor.calculateValidityHash(job.getCsvDirectory(), job.getTag());
PreprocessedHeader header = new PreprocessedHeader(descriptor.getName(), descriptor.getTable(), rows, columns, hash);
final PreprocessedDictionaries dictionaries = new PreprocessedDictionaries(primaryDictionary, dicts);
final PreprocessedData data = new PreprocessedData(entityStart, entityLength, columnStores);
writePreprocessed(file, header, dictionaries, data);
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class BigStoreTest method testEmpty.
@Test
public void testEmpty() throws JSONException, IOException {
BigStore<DictionaryId, Dictionary> store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, StoreMappings.DICTIONARIES.storeInfo(), (e) -> {
}, (e) -> {
}, MAPPER);
store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes()));
Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict");
// check if manual serialization deserialization works
byte[] bytes = MAPPER.writeValueAsBytes(nDict);
Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class);
assertThat(simpleCopy).isEmpty();
// check if store works
store.add(nDict.getId(), nDict);
// check if the bytes in the store are the same as bytes
assertThat(new SequenceInputStream(Iterators.asEnumeration(store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))).hasSameContentAs(new ByteArrayInputStream(bytes));
Dictionary copy = store.get(nDict.getId());
assertThat(copy).isEmpty();
}
Aggregations