Search in sources :

Example 6 with Entity

use of com.bakdata.conquery.models.query.entity.Entity in project conquery by bakdata.

the class ImportJob method selectBucket.

/**
 * - remap Entity-Ids to global
 * - calculate per-Entity regions of Bucklet (start/end)
 * - split stores
 */
private Bucket selectBucket(Map<Integer, Integer> localStarts, Map<Integer, Integer> localLengths, ColumnStore[] stores, DictionaryMapping primaryMapping, Import imp, int bucketId, List<Integer> localEntities) {
    final int root = bucketSize * bucketId;
    IntList selectionStart = new IntArrayList();
    IntList selectionLength = new IntArrayList();
    IntSet entities = new IntOpenHashSet();
    // First entity of Bucket starts at 0, the following are appended.
    int[] entityStarts = new int[bucketSize];
    int[] entityEnds = new int[bucketSize];
    Arrays.fill(entityEnds, -1);
    Arrays.fill(entityStarts, -1);
    int currentStart = 0;
    for (int position = 0; position < bucketSize; position++) {
        int globalId = root + position;
        int localId = primaryMapping.target2Source(globalId);
        if (localId == -1) {
            continue;
        }
        if (!localStarts.containsKey(localId)) {
            continue;
        }
        entities.add(globalId);
        final int length = localLengths.get(localId);
        selectionStart.add(localStarts.get(localId));
        selectionLength.add(length);
        entityStarts[position] = currentStart;
        entityEnds[position] = currentStart + length;
        currentStart += length;
    }
    // copy only the parts of the bucket we need
    final ColumnStore[] bucketStores = Arrays.stream(stores).map(store -> store.select(selectionStart.toIntArray(), selectionLength.toIntArray())).toArray(ColumnStore[]::new);
    return new Bucket(bucketId, root, selectionLength.intStream().sum(), bucketStores, entities, entityStarts, entityEnds, imp);
}
Also used : IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) java.util(java.util) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) Getter(lombok.Getter) PreprocessedHeader(com.bakdata.conquery.models.preproc.PreprocessedHeader) RequiredArgsConstructor(lombok.RequiredArgsConstructor) PreprocessedData(com.bakdata.conquery.models.preproc.PreprocessedData) com.bakdata.conquery.models.identifiable.ids.specific(com.bakdata.conquery.models.identifiable.ids.specific) NamespaceStorage(com.bakdata.conquery.io.storage.NamespaceStorage) IdMutex(com.bakdata.conquery.models.identifiable.IdMutex) com.bakdata.conquery.models.datasets(com.bakdata.conquery.models.datasets) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) WorkerInformation(com.bakdata.conquery.models.worker.WorkerInformation) BadRequestException(javax.ws.rs.BadRequestException) JSONException(com.bakdata.conquery.models.exceptions.JSONException) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser) PreprocessedReader(com.bakdata.conquery.models.preproc.PreprocessedReader) ResourceUtil(com.bakdata.conquery.util.ResourceUtil) Bucket(com.bakdata.conquery.models.events.Bucket) MajorTypeId(com.bakdata.conquery.models.events.MajorTypeId) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) com.bakdata.conquery.models.messages.namespaces.specific(com.bakdata.conquery.models.messages.namespaces.specific) Collectors(java.util.stream.Collectors) Entity(com.bakdata.conquery.models.query.entity.Entity) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) Slf4j(lombok.extern.slf4j.Slf4j) IntList(it.unimi.dsi.fastutil.ints.IntList) Response(javax.ws.rs.core.Response) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) WebApplicationException(javax.ws.rs.WebApplicationException) IntSet(it.unimi.dsi.fastutil.ints.IntSet) PreprocessedDictionaries(com.bakdata.conquery.models.preproc.PreprocessedDictionaries) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Namespace(com.bakdata.conquery.models.worker.Namespace) InputStream(java.io.InputStream) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) Bucket(com.bakdata.conquery.models.events.Bucket) IntSet(it.unimi.dsi.fastutil.ints.IntSet) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Example 7 with Entity

use of com.bakdata.conquery.models.query.entity.Entity in project conquery by bakdata.

the class BucketManager method create.

public static BucketManager create(Worker worker, WorkerStorage storage, int entityBucketSize) {
    Int2ObjectMap<Entity> entities = new Int2ObjectAVLTreeMap<>();
    Map<Connector, Int2ObjectMap<Map<Bucket, CBlock>>> connectorCBlocks = new HashMap<>();
    Map<Table, Int2ObjectMap<List<Bucket>>> tableBuckets = new HashMap<>();
    IntArraySet assignedBucketNumbers = worker.getInfo().getIncludedBuckets();
    log.trace("Trying to load these buckets that map to: {}", assignedBucketNumbers);
    for (Bucket bucket : storage.getAllBuckets()) {
        if (!assignedBucketNumbers.contains(bucket.getBucket())) {
            log.warn("Found Bucket[{}] in Storage that does not belong to this Worker according to the Worker information.", bucket.getId());
        }
        registerBucket(bucket, entities, tableBuckets);
    }
    for (CBlock cBlock : storage.getAllCBlocks()) {
        registerCBlock(cBlock, connectorCBlocks);
    }
    return new BucketManager(worker.getJobManager(), storage, worker, entities, connectorCBlocks, tableBuckets, entityBucketSize);
}
Also used : Entity(com.bakdata.conquery.models.query.entity.Entity) ConceptTreeConnector(com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector) Connector(com.bakdata.conquery.models.datasets.concepts.Connector) Table(com.bakdata.conquery.models.datasets.Table) HashMap(java.util.HashMap) Int2ObjectAVLTreeMap(it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet)

Aggregations

Entity (com.bakdata.conquery.models.query.entity.Entity)7 ArrayList (java.util.ArrayList)4 List (java.util.List)4 Getter (lombok.Getter)4 CDateSet (com.bakdata.conquery.models.common.CDateSet)3 Table (com.bakdata.conquery.models.datasets.Table)3 QueryExecutionContext (com.bakdata.conquery.models.query.QueryExecutionContext)3 Aggregator (com.bakdata.conquery.models.query.queryplan.aggregators.Aggregator)3 MultilineEntityResult (com.bakdata.conquery.models.query.results.MultilineEntityResult)3 Optional (java.util.Optional)3 Connector (com.bakdata.conquery.models.datasets.concepts.Connector)2 ConceptTreeConnector (com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector)2 Bucket (com.bakdata.conquery.models.events.Bucket)2 DateContext (com.bakdata.conquery.models.forms.util.DateContext)2 ResultModifier (com.bakdata.conquery.models.forms.util.ResultModifier)2 IdMutex (com.bakdata.conquery.models.identifiable.IdMutex)2 ArrayConceptQueryPlan (com.bakdata.conquery.models.query.queryplan.ArrayConceptQueryPlan)2 QueryPlan (com.bakdata.conquery.models.query.queryplan.QueryPlan)2 SinglelineEntityResult (com.bakdata.conquery.models.query.results.SinglelineEntityResult)2 QueryUtils (com.bakdata.conquery.util.QueryUtils)2