use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class PPColumn method findBestType.
public ColumnStore findBestType() {
log.info("Compute best Subtype for Column[{}] with {}", getName(), getParser());
ColumnStore decision = parser.findBestType();
// this only creates the headers
log.debug("\t{}: {} -> {}", getName(), getParser(), decision);
return decision;
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class Preprocessed method write.
public void write(File file) throws IOException {
Int2IntMap entityStart = new Int2IntAVLTreeMap();
Int2IntMap entityLength = new Int2IntAVLTreeMap();
calculateEntitySpans(entityStart, entityLength);
final IntSummaryStatistics statistics = entityLength.values().intStream().summaryStatistics();
log.info("Statistics = {}", statistics);
Map<String, ColumnStore> columnStores = combineStores(entityStart);
Dictionary primaryDictionary = encodePrimaryDictionary();
Map<String, Dictionary> dicts = collectDictionaries(columnStores);
log.debug("Writing Headers");
int hash = descriptor.calculateValidityHash(job.getCsvDirectory(), job.getTag());
PreprocessedHeader header = new PreprocessedHeader(descriptor.getName(), descriptor.getTable(), rows, columns, hash);
final PreprocessedDictionaries dictionaries = new PreprocessedDictionaries(primaryDictionary, dicts);
final PreprocessedData data = new PreprocessedData(entityStart, entityLength, columnStores);
writePreprocessed(file, header, dictionaries, data);
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class SerializationTests method bucketCompoundDateRange.
@Test
public void bucketCompoundDateRange() throws JSONException, IOException {
Dataset dataset = new Dataset();
dataset.setName("datasetName");
Table table = new Table();
Column startCol = new Column();
startCol.setName("startCol");
startCol.setType(MajorTypeId.DATE);
startCol.setTable(table);
Column endCol = new Column();
endCol.setLabel("endLabel");
endCol.setName("endCol");
endCol.setType(MajorTypeId.DATE);
endCol.setTable(table);
Column compoundCol = new Column();
compoundCol.setName("compoundCol");
compoundCol.setType(MajorTypeId.DATE_RANGE);
compoundCol.setTable(table);
table.setColumns(new Column[] { startCol, endCol, compoundCol });
table.setDataset(dataset);
table.setName("tableName");
Import imp = new Import(table);
imp.setName("importTest");
DateRangeTypeCompound compoundStore = new DateRangeTypeCompound(startCol.getName(), endCol.getName(), new BitSetStore(BitSet.valueOf(new byte[] { 0b1000 }), new BitSet(), 4));
// 0b1000 is a binary representation of 8 so that the 4th is set to make sure that BitSet length is 4.
ColumnStore startStore = new IntegerDateStore(new ShortArrayStore(new short[] { 1, 2, 3, 4 }, Short.MIN_VALUE));
ColumnStore endStore = new IntegerDateStore(new ShortArrayStore(new short[] { 5, 6, 7, 8 }, Short.MIN_VALUE));
Bucket bucket = new Bucket(0, 1, 4, new ColumnStore[] { startStore, endStore, compoundStore }, Collections.emptySet(), new int[0], new int[0], imp);
compoundStore.setParent(bucket);
CentralRegistry registry = new CentralRegistry();
registry.register(dataset);
registry.register(startCol);
registry.register(endCol);
registry.register(compoundCol);
registry.register(table);
registry.register(imp);
registry.register(bucket);
final Validator validator = Validators.newValidator();
SerializationTestUtil.forType(Bucket.class).registry(registry).injectables(new Injectable() {
@Override
public MutableInjectableValues inject(MutableInjectableValues values) {
return values.add(Validator.class, validator);
}
}).test(bucket);
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class DateRangeParserTest method onlyClosed.
@Test
public void onlyClosed() {
final DateRangeParser parser = new DateRangeParser(new ConqueryConfig());
List.of(CDateRange.of(10, 11), CDateRange.exactly(10)).forEach(parser::addLine);
final ColumnStore actual = parser.decideType();
assertThat(actual).isInstanceOf(DateRangeTypeDateRange.class);
assertThat(((IntegerDateStore) ((DateRangeTypeDateRange) actual).getMinStore()).getStore()).isInstanceOfAny(ByteArrayStore.class, RebasingStore.class);
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class ImportJob method selectBucket.
/**
* - remap Entity-Ids to global
* - calculate per-Entity regions of Bucklet (start/end)
* - split stores
*/
private Bucket selectBucket(Map<Integer, Integer> localStarts, Map<Integer, Integer> localLengths, ColumnStore[] stores, DictionaryMapping primaryMapping, Import imp, int bucketId, List<Integer> localEntities) {
final int root = bucketSize * bucketId;
IntList selectionStart = new IntArrayList();
IntList selectionLength = new IntArrayList();
IntSet entities = new IntOpenHashSet();
// First entity of Bucket starts at 0, the following are appended.
int[] entityStarts = new int[bucketSize];
int[] entityEnds = new int[bucketSize];
Arrays.fill(entityEnds, -1);
Arrays.fill(entityStarts, -1);
int currentStart = 0;
for (int position = 0; position < bucketSize; position++) {
int globalId = root + position;
int localId = primaryMapping.target2Source(globalId);
if (localId == -1) {
continue;
}
if (!localStarts.containsKey(localId)) {
continue;
}
entities.add(globalId);
final int length = localLengths.get(localId);
selectionStart.add(localStarts.get(localId));
selectionLength.add(length);
entityStarts[position] = currentStart;
entityEnds[position] = currentStart + length;
currentStart += length;
}
// copy only the parts of the bucket we need
final ColumnStore[] bucketStores = Arrays.stream(stores).map(store -> store.select(selectionStart.toIntArray(), selectionLength.toIntArray())).toArray(ColumnStore[]::new);
return new Bucket(bucketId, root, selectionLength.intStream().sum(), bucketStores, entities, entityStarts, entityEnds, imp);
}
Aggregations