use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class AbstractSelectFilter method addImport.
@Override
public void addImport(Import imp) {
if (values == null) {
values = new HashSet<>();
}
final ColumnStore store = getColumn().getTypeFor(imp);
values.addAll(Sets.newHashSet(((StringStore) store).iterator()));
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class Preprocessed method combineStores.
/**
* Combine raw by-Entity data into column stores, appropriately formatted.
*/
@SuppressWarnings("rawtypes")
private Map<String, ColumnStore> combineStores(Int2IntMap entityStart) {
Map<String, ColumnStore> columnStores = Arrays.stream(columns).parallel().collect(Collectors.toMap(PPColumn::getName, PPColumn::findBestType));
// This object can be huge!
Int2ObjectMap<IntList> entityEvents = new Int2ObjectOpenHashMap<>(entityStart.size());
for (int pos = 0, size = rowEntities.size(); pos < size; pos++) {
int entity = rowEntities.getInt(pos);
entityEvents.computeIfAbsent(entity, (ignored) -> new IntArrayList()).add(pos);
}
for (int colIdx = 0; colIdx < columns.length; colIdx++) {
final PPColumn ppColumn = columns[colIdx];
final ColumnValues columnValues = values[colIdx];
// No need to preprocess the column further more, if it does not contain values, likely backed by a compound ColumnStore
if (columnValues == null) {
continue;
}
final ColumnStore store = columnStores.get(ppColumn.getName());
entityStart.int2IntEntrySet().forEach(entry -> {
final int entity = entry.getIntKey();
int outIndex = entry.getIntValue();
final IntList events = entityEvents.getOrDefault(entity, IntLists.emptyList());
for (int inIndex : events) {
if (columnValues.isNull(inIndex)) {
store.setNull(outIndex);
} else {
final Object raw = columnValues.get(inIndex);
ppColumn.getParser().setValue(store, outIndex, raw);
}
outIndex++;
}
});
}
return columnStores;
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class ImportJob method sendBuckets.
/**
* select, then send buckets.
*/
private Map<WorkerId, Set<BucketId>> sendBuckets(Map<Integer, Integer> starts, Map<Integer, Integer> lengths, DictionaryMapping primaryMapping, Import imp, Map<Integer, List<Integer>> buckets2LocalEntities, ColumnStore[] storesSorted) throws JsonProcessingException {
Map<WorkerId, Set<BucketId>> newWorkerAssignments = new HashMap<>();
final ProgressReporter subJob = getProgressReporter().subJob(buckets2LocalEntities.size());
for (Map.Entry<Integer, List<Integer>> bucket2entities : buckets2LocalEntities.entrySet()) {
WorkerInformation responsibleWorker = Objects.requireNonNull(namespace.getResponsibleWorkerForBucket(bucket2entities.getKey()), () -> "No responsible worker for Bucket#" + bucket2entities.getKey());
awaitFreeJobQueue(responsibleWorker);
final Bucket bucket = selectBucket(starts, lengths, storesSorted, primaryMapping, imp, bucket2entities.getKey(), bucket2entities.getValue());
newWorkerAssignments.computeIfAbsent(responsibleWorker.getId(), (ignored) -> new HashSet<>()).add(bucket.getId());
log.trace("Sending Bucket[{}] to {}", bucket.getId(), responsibleWorker.getId());
responsibleWorker.send(ImportBucket.forBucket(bucket));
subJob.report(1);
}
subJob.done();
return newWorkerAssignments;
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class ImportJob method execute.
@Override
public void execute() throws JSONException, InterruptedException, IOException {
getProgressReporter().setMax(NUMBER_OF_STEPS);
log.trace("Updating primary dictionary");
// Update primary dictionary: load new data, and create mapping.
final DictionaryMapping primaryMapping = importPrimaryDictionary(dictionaries.getPrimaryDictionary());
getProgressReporter().report(1);
// Distribute the new IDs among workers
distributeWorkerResponsibilities(primaryMapping);
getProgressReporter().report(1);
log.info("Importing Dictionaries");
Map<String, DictionaryMapping> sharedDictionaryMappings = importDictionaries(namespace, dictionaries.getDictionaries(), table.getColumns(), header.getName());
log.info("Remapping Dictionaries {}", sharedDictionaryMappings.values());
applyDictionaryMappings(sharedDictionaryMappings, container.getStores());
Import imp = createImport(header, container.getStores(), table.getColumns(), container.size());
namespace.getStorage().updateImport(imp);
Map<Integer, List<Integer>> buckets2LocalEntities = groupEntitiesByBucket(container.entities(), primaryMapping, bucketSize);
final ColumnStore[] storesSorted = Arrays.stream(table.getColumns()).map(Column::getName).map(container.getStores()::get).map(Objects::requireNonNull).toArray(ColumnStore[]::new);
log.info("Start sending {} Buckets", buckets2LocalEntities.size());
// we use this to track assignment to workers.
final Map<WorkerId, Set<BucketId>> workerAssignments = sendBuckets(container.getStarts(), container.getLengths(), primaryMapping, imp, buckets2LocalEntities, storesSorted);
workerAssignments.forEach(namespace::addBucketsToWorker);
getProgressReporter().done();
}
use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.
the class Preprocessed method collectDictionaries.
private static Map<String, Dictionary> collectDictionaries(Map<String, ColumnStore> columnStores) {
final Map<String, Dictionary> collect = new HashMap<>();
for (Map.Entry<String, ColumnStore> entry : columnStores.entrySet()) {
if (!(entry.getValue() instanceof StringStore)) {
continue;
}
final Dictionary dictionary = ((StringStore) entry.getValue()).getUnderlyingDictionary();
if (dictionary == null) {
continue;
}
collect.put(entry.getKey(), dictionary);
}
return collect;
}
Aggregations