Search in sources :

Example 6 with Bucket

use of com.bakdata.conquery.models.events.Bucket in project conquery by bakdata.

the class SerializationTests method bucketCompoundDateRange.

@Test
public void bucketCompoundDateRange() throws JSONException, IOException {
    Dataset dataset = new Dataset();
    dataset.setName("datasetName");
    Table table = new Table();
    Column startCol = new Column();
    startCol.setName("startCol");
    startCol.setType(MajorTypeId.DATE);
    startCol.setTable(table);
    Column endCol = new Column();
    endCol.setLabel("endLabel");
    endCol.setName("endCol");
    endCol.setType(MajorTypeId.DATE);
    endCol.setTable(table);
    Column compoundCol = new Column();
    compoundCol.setName("compoundCol");
    compoundCol.setType(MajorTypeId.DATE_RANGE);
    compoundCol.setTable(table);
    table.setColumns(new Column[] { startCol, endCol, compoundCol });
    table.setDataset(dataset);
    table.setName("tableName");
    Import imp = new Import(table);
    imp.setName("importTest");
    DateRangeTypeCompound compoundStore = new DateRangeTypeCompound(startCol.getName(), endCol.getName(), new BitSetStore(BitSet.valueOf(new byte[] { 0b1000 }), new BitSet(), 4));
    // 0b1000 is a binary representation of 8 so that the 4th is set to make sure that BitSet length is 4.
    ColumnStore startStore = new IntegerDateStore(new ShortArrayStore(new short[] { 1, 2, 3, 4 }, Short.MIN_VALUE));
    ColumnStore endStore = new IntegerDateStore(new ShortArrayStore(new short[] { 5, 6, 7, 8 }, Short.MIN_VALUE));
    Bucket bucket = new Bucket(0, 1, 4, new ColumnStore[] { startStore, endStore, compoundStore }, Collections.emptySet(), new int[0], new int[0], imp);
    compoundStore.setParent(bucket);
    CentralRegistry registry = new CentralRegistry();
    registry.register(dataset);
    registry.register(startCol);
    registry.register(endCol);
    registry.register(compoundCol);
    registry.register(table);
    registry.register(imp);
    registry.register(bucket);
    final Validator validator = Validators.newValidator();
    SerializationTestUtil.forType(Bucket.class).registry(registry).injectables(new Injectable() {

        @Override
        public MutableInjectableValues inject(MutableInjectableValues values) {
            return values.add(Validator.class, validator);
        }
    }).test(bucket);
}
Also used : Injectable(com.bakdata.conquery.io.jackson.Injectable) CQTable(com.bakdata.conquery.apiv1.query.concept.filter.CQTable) Table(com.bakdata.conquery.models.datasets.Table) Import(com.bakdata.conquery.models.datasets.Import) BitSetStore(com.bakdata.conquery.models.events.stores.primitive.BitSetStore) Dataset(com.bakdata.conquery.models.datasets.Dataset) MutableInjectableValues(com.bakdata.conquery.io.jackson.MutableInjectableValues) BitSet(java.util.BitSet) ShortArrayStore(com.bakdata.conquery.models.events.stores.primitive.ShortArrayStore) DateRangeTypeCompound(com.bakdata.conquery.models.events.stores.specific.DateRangeTypeCompound) CentralRegistry(com.bakdata.conquery.models.identifiable.CentralRegistry) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) Column(com.bakdata.conquery.models.datasets.Column) Bucket(com.bakdata.conquery.models.events.Bucket) IntegerDateStore(com.bakdata.conquery.models.events.stores.primitive.IntegerDateStore) Validator(javax.validation.Validator) Test(org.junit.jupiter.api.Test) IdMapSerialisationTest(com.bakdata.conquery.models.identifiable.IdMapSerialisationTest)

Example 7 with Bucket

use of com.bakdata.conquery.models.events.Bucket in project conquery by bakdata.

the class ImportJob method selectBucket.

/**
 * - remap Entity-Ids to global
 * - calculate per-Entity regions of Bucklet (start/end)
 * - split stores
 */
private Bucket selectBucket(Map<Integer, Integer> localStarts, Map<Integer, Integer> localLengths, ColumnStore[] stores, DictionaryMapping primaryMapping, Import imp, int bucketId, List<Integer> localEntities) {
    final int root = bucketSize * bucketId;
    IntList selectionStart = new IntArrayList();
    IntList selectionLength = new IntArrayList();
    IntSet entities = new IntOpenHashSet();
    // First entity of Bucket starts at 0, the following are appended.
    int[] entityStarts = new int[bucketSize];
    int[] entityEnds = new int[bucketSize];
    Arrays.fill(entityEnds, -1);
    Arrays.fill(entityStarts, -1);
    int currentStart = 0;
    for (int position = 0; position < bucketSize; position++) {
        int globalId = root + position;
        int localId = primaryMapping.target2Source(globalId);
        if (localId == -1) {
            continue;
        }
        if (!localStarts.containsKey(localId)) {
            continue;
        }
        entities.add(globalId);
        final int length = localLengths.get(localId);
        selectionStart.add(localStarts.get(localId));
        selectionLength.add(length);
        entityStarts[position] = currentStart;
        entityEnds[position] = currentStart + length;
        currentStart += length;
    }
    // copy only the parts of the bucket we need
    final ColumnStore[] bucketStores = Arrays.stream(stores).map(store -> store.select(selectionStart.toIntArray(), selectionLength.toIntArray())).toArray(ColumnStore[]::new);
    return new Bucket(bucketId, root, selectionLength.intStream().sum(), bucketStores, entities, entityStarts, entityEnds, imp);
}
Also used : IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) java.util(java.util) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) Getter(lombok.Getter) PreprocessedHeader(com.bakdata.conquery.models.preproc.PreprocessedHeader) RequiredArgsConstructor(lombok.RequiredArgsConstructor) PreprocessedData(com.bakdata.conquery.models.preproc.PreprocessedData) com.bakdata.conquery.models.identifiable.ids.specific(com.bakdata.conquery.models.identifiable.ids.specific) NamespaceStorage(com.bakdata.conquery.io.storage.NamespaceStorage) IdMutex(com.bakdata.conquery.models.identifiable.IdMutex) com.bakdata.conquery.models.datasets(com.bakdata.conquery.models.datasets) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) WorkerInformation(com.bakdata.conquery.models.worker.WorkerInformation) BadRequestException(javax.ws.rs.BadRequestException) JSONException(com.bakdata.conquery.models.exceptions.JSONException) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser) PreprocessedReader(com.bakdata.conquery.models.preproc.PreprocessedReader) ResourceUtil(com.bakdata.conquery.util.ResourceUtil) Bucket(com.bakdata.conquery.models.events.Bucket) MajorTypeId(com.bakdata.conquery.models.events.MajorTypeId) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) com.bakdata.conquery.models.messages.namespaces.specific(com.bakdata.conquery.models.messages.namespaces.specific) Collectors(java.util.stream.Collectors) Entity(com.bakdata.conquery.models.query.entity.Entity) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) Slf4j(lombok.extern.slf4j.Slf4j) IntList(it.unimi.dsi.fastutil.ints.IntList) Response(javax.ws.rs.core.Response) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) WebApplicationException(javax.ws.rs.WebApplicationException) IntSet(it.unimi.dsi.fastutil.ints.IntSet) PreprocessedDictionaries(com.bakdata.conquery.models.preproc.PreprocessedDictionaries) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Namespace(com.bakdata.conquery.models.worker.Namespace) InputStream(java.io.InputStream) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) Bucket(com.bakdata.conquery.models.events.Bucket) IntSet(it.unimi.dsi.fastutil.ints.IntSet) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Aggregations

Bucket (com.bakdata.conquery.models.events.Bucket)7 Table (com.bakdata.conquery.models.datasets.Table)4 ColumnStore (com.bakdata.conquery.models.events.stores.root.ColumnStore)3 Entity (com.bakdata.conquery.models.query.entity.Entity)3 NamespaceStorage (com.bakdata.conquery.io.storage.NamespaceStorage)2 ConqueryConfig (com.bakdata.conquery.models.config.ConqueryConfig)2 com.bakdata.conquery.models.datasets (com.bakdata.conquery.models.datasets)2 Column (com.bakdata.conquery.models.datasets.Column)2 Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)2 DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)2 MajorTypeId (com.bakdata.conquery.models.events.MajorTypeId)2 IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)2 StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)2 JSONException (com.bakdata.conquery.models.exceptions.JSONException)2 IdMutex (com.bakdata.conquery.models.identifiable.IdMutex)2 com.bakdata.conquery.models.identifiable.ids.specific (com.bakdata.conquery.models.identifiable.ids.specific)2 com.bakdata.conquery.models.messages.namespaces.specific (com.bakdata.conquery.models.messages.namespaces.specific)2 PreprocessedData (com.bakdata.conquery.models.preproc.PreprocessedData)2 PreprocessedDictionaries (com.bakdata.conquery.models.preproc.PreprocessedDictionaries)2 PreprocessedHeader (com.bakdata.conquery.models.preproc.PreprocessedHeader)2