use of com.bakdata.conquery.models.query.entity.Entity in project conquery by bakdata.
the class ImportJob method selectBucket.
/**
* - remap Entity-Ids to global
* - calculate per-Entity regions of Bucklet (start/end)
* - split stores
*/
private Bucket selectBucket(Map<Integer, Integer> localStarts, Map<Integer, Integer> localLengths, ColumnStore[] stores, DictionaryMapping primaryMapping, Import imp, int bucketId, List<Integer> localEntities) {
final int root = bucketSize * bucketId;
IntList selectionStart = new IntArrayList();
IntList selectionLength = new IntArrayList();
IntSet entities = new IntOpenHashSet();
// First entity of Bucket starts at 0, the following are appended.
int[] entityStarts = new int[bucketSize];
int[] entityEnds = new int[bucketSize];
Arrays.fill(entityEnds, -1);
Arrays.fill(entityStarts, -1);
int currentStart = 0;
for (int position = 0; position < bucketSize; position++) {
int globalId = root + position;
int localId = primaryMapping.target2Source(globalId);
if (localId == -1) {
continue;
}
if (!localStarts.containsKey(localId)) {
continue;
}
entities.add(globalId);
final int length = localLengths.get(localId);
selectionStart.add(localStarts.get(localId));
selectionLength.add(length);
entityStarts[position] = currentStart;
entityEnds[position] = currentStart + length;
currentStart += length;
}
// copy only the parts of the bucket we need
final ColumnStore[] bucketStores = Arrays.stream(stores).map(store -> store.select(selectionStart.toIntArray(), selectionLength.toIntArray())).toArray(ColumnStore[]::new);
return new Bucket(bucketId, root, selectionLength.intStream().sum(), bucketStores, entities, entityStarts, entityEnds, imp);
}
use of com.bakdata.conquery.models.query.entity.Entity in project conquery by bakdata.
the class BucketManager method create.
public static BucketManager create(Worker worker, WorkerStorage storage, int entityBucketSize) {
Int2ObjectMap<Entity> entities = new Int2ObjectAVLTreeMap<>();
Map<Connector, Int2ObjectMap<Map<Bucket, CBlock>>> connectorCBlocks = new HashMap<>();
Map<Table, Int2ObjectMap<List<Bucket>>> tableBuckets = new HashMap<>();
IntArraySet assignedBucketNumbers = worker.getInfo().getIncludedBuckets();
log.trace("Trying to load these buckets that map to: {}", assignedBucketNumbers);
for (Bucket bucket : storage.getAllBuckets()) {
if (!assignedBucketNumbers.contains(bucket.getBucket())) {
log.warn("Found Bucket[{}] in Storage that does not belong to this Worker according to the Worker information.", bucket.getId());
}
registerBucket(bucket, entities, tableBuckets);
}
for (CBlock cBlock : storage.getAllCBlocks()) {
registerCBlock(cBlock, connectorCBlocks);
}
return new BucketManager(worker.getJobManager(), storage, worker, entities, connectorCBlocks, tableBuckets, entityBucketSize);
}
Aggregations