Search in sources :

Example 1 with ProgressReporter

use of com.bakdata.conquery.util.progressreporter.ProgressReporter in project conquery by bakdata.

the class ImportJob method sendBuckets.

/**
 * select, then send buckets.
 */
private Map<WorkerId, Set<BucketId>> sendBuckets(Map<Integer, Integer> starts, Map<Integer, Integer> lengths, DictionaryMapping primaryMapping, Import imp, Map<Integer, List<Integer>> buckets2LocalEntities, ColumnStore[] storesSorted) throws JsonProcessingException {
    Map<WorkerId, Set<BucketId>> newWorkerAssignments = new HashMap<>();
    final ProgressReporter subJob = getProgressReporter().subJob(buckets2LocalEntities.size());
    for (Map.Entry<Integer, List<Integer>> bucket2entities : buckets2LocalEntities.entrySet()) {
        WorkerInformation responsibleWorker = Objects.requireNonNull(namespace.getResponsibleWorkerForBucket(bucket2entities.getKey()), () -> "No responsible worker for Bucket#" + bucket2entities.getKey());
        awaitFreeJobQueue(responsibleWorker);
        final Bucket bucket = selectBucket(starts, lengths, storesSorted, primaryMapping, imp, bucket2entities.getKey(), bucket2entities.getValue());
        newWorkerAssignments.computeIfAbsent(responsibleWorker.getId(), (ignored) -> new HashSet<>()).add(bucket.getId());
        log.trace("Sending Bucket[{}] to {}", bucket.getId(), responsibleWorker.getId());
        responsibleWorker.send(ImportBucket.forBucket(bucket));
        subJob.report(1);
    }
    subJob.done();
    return newWorkerAssignments;
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) java.util(java.util) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) Getter(lombok.Getter) PreprocessedHeader(com.bakdata.conquery.models.preproc.PreprocessedHeader) RequiredArgsConstructor(lombok.RequiredArgsConstructor) PreprocessedData(com.bakdata.conquery.models.preproc.PreprocessedData) com.bakdata.conquery.models.identifiable.ids.specific(com.bakdata.conquery.models.identifiable.ids.specific) NamespaceStorage(com.bakdata.conquery.io.storage.NamespaceStorage) IdMutex(com.bakdata.conquery.models.identifiable.IdMutex) com.bakdata.conquery.models.datasets(com.bakdata.conquery.models.datasets) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) WorkerInformation(com.bakdata.conquery.models.worker.WorkerInformation) BadRequestException(javax.ws.rs.BadRequestException) JSONException(com.bakdata.conquery.models.exceptions.JSONException) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser) PreprocessedReader(com.bakdata.conquery.models.preproc.PreprocessedReader) ResourceUtil(com.bakdata.conquery.util.ResourceUtil) Bucket(com.bakdata.conquery.models.events.Bucket) MajorTypeId(com.bakdata.conquery.models.events.MajorTypeId) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) com.bakdata.conquery.models.messages.namespaces.specific(com.bakdata.conquery.models.messages.namespaces.specific) Collectors(java.util.stream.Collectors) Entity(com.bakdata.conquery.models.query.entity.Entity) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) Slf4j(lombok.extern.slf4j.Slf4j) IntList(it.unimi.dsi.fastutil.ints.IntList) Response(javax.ws.rs.core.Response) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) WebApplicationException(javax.ws.rs.WebApplicationException) IntSet(it.unimi.dsi.fastutil.ints.IntSet) PreprocessedDictionaries(com.bakdata.conquery.models.preproc.PreprocessedDictionaries) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Namespace(com.bakdata.conquery.models.worker.Namespace) InputStream(java.io.InputStream) WorkerInformation(com.bakdata.conquery.models.worker.WorkerInformation) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) IntSet(it.unimi.dsi.fastutil.ints.IntSet) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) Bucket(com.bakdata.conquery.models.events.Bucket) IntList(it.unimi.dsi.fastutil.ints.IntList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet)

Example 2 with ProgressReporter

use of com.bakdata.conquery.util.progressreporter.ProgressReporter in project conquery by bakdata.

the class ImportJob method applyDictionaryMappings.

/**
 * Apply new positions into incoming shared dictionaries.
 */
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
    final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
    for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
        final String columnName = entry.getKey();
        final DictionaryMapping mapping = entry.getValue();
        final StringStore stringStore = (StringStore) values.get(columnName);
        log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
        // we need to find a new Type for the index-Column as it's going to be remapped and might change in size
        final IntegerParser indexParser = new IntegerParser(config);
        final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
        indexParser.setLines(stringStore.getLines());
        indexParser.setMinValue(statistics.getMin());
        indexParser.setMaxValue(statistics.getMax());
        final IntegerStore newType = indexParser.findBestType();
        log.trace("Decided for {}", newType);
        mapping.applyToStore(stringStore, newType);
        stringStore.setIndexStore(newType);
        subJob.report(1);
    }
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Aggregations

DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)2 IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)2 StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)2 IntegerParser (com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)2 ProgressReporter (com.bakdata.conquery.util.progressreporter.ProgressReporter)2 NamespaceStorage (com.bakdata.conquery.io.storage.NamespaceStorage)1 ConqueryConfig (com.bakdata.conquery.models.config.ConqueryConfig)1 com.bakdata.conquery.models.datasets (com.bakdata.conquery.models.datasets)1 Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)1 Bucket (com.bakdata.conquery.models.events.Bucket)1 MajorTypeId (com.bakdata.conquery.models.events.MajorTypeId)1 ColumnStore (com.bakdata.conquery.models.events.stores.root.ColumnStore)1 JSONException (com.bakdata.conquery.models.exceptions.JSONException)1 IdMutex (com.bakdata.conquery.models.identifiable.IdMutex)1 com.bakdata.conquery.models.identifiable.ids.specific (com.bakdata.conquery.models.identifiable.ids.specific)1 com.bakdata.conquery.models.messages.namespaces.specific (com.bakdata.conquery.models.messages.namespaces.specific)1 PreprocessedData (com.bakdata.conquery.models.preproc.PreprocessedData)1 PreprocessedDictionaries (com.bakdata.conquery.models.preproc.PreprocessedDictionaries)1 PreprocessedHeader (com.bakdata.conquery.models.preproc.PreprocessedHeader)1 PreprocessedReader (com.bakdata.conquery.models.preproc.PreprocessedReader)1