Search in sources :

Example 6 with ColumnStore

use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.

the class PPColumn method findBestType.

public ColumnStore findBestType() {
    log.info("Compute best Subtype for  Column[{}] with {}", getName(), getParser());
    ColumnStore decision = parser.findBestType();
    // this only creates the headers
    log.debug("\t{}: {} -> {}", getName(), getParser(), decision);
    return decision;
}
Also used : ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore)

Example 7 with ColumnStore

use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.

the class Preprocessed method write.

public void write(File file) throws IOException {
    Int2IntMap entityStart = new Int2IntAVLTreeMap();
    Int2IntMap entityLength = new Int2IntAVLTreeMap();
    calculateEntitySpans(entityStart, entityLength);
    final IntSummaryStatistics statistics = entityLength.values().intStream().summaryStatistics();
    log.info("Statistics = {}", statistics);
    Map<String, ColumnStore> columnStores = combineStores(entityStart);
    Dictionary primaryDictionary = encodePrimaryDictionary();
    Map<String, Dictionary> dicts = collectDictionaries(columnStores);
    log.debug("Writing Headers");
    int hash = descriptor.calculateValidityHash(job.getCsvDirectory(), job.getTag());
    PreprocessedHeader header = new PreprocessedHeader(descriptor.getName(), descriptor.getTable(), rows, columns, hash);
    final PreprocessedDictionaries dictionaries = new PreprocessedDictionaries(primaryDictionary, dicts);
    final PreprocessedData data = new PreprocessedData(entityStart, entityLength, columnStores);
    writePreprocessed(file, header, dictionaries, data);
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) IntSummaryStatistics(java.util.IntSummaryStatistics) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) Int2IntAVLTreeMap(it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap)

Example 8 with ColumnStore

use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.

the class SerializationTests method bucketCompoundDateRange.

@Test
public void bucketCompoundDateRange() throws JSONException, IOException {
    Dataset dataset = new Dataset();
    dataset.setName("datasetName");
    Table table = new Table();
    Column startCol = new Column();
    startCol.setName("startCol");
    startCol.setType(MajorTypeId.DATE);
    startCol.setTable(table);
    Column endCol = new Column();
    endCol.setLabel("endLabel");
    endCol.setName("endCol");
    endCol.setType(MajorTypeId.DATE);
    endCol.setTable(table);
    Column compoundCol = new Column();
    compoundCol.setName("compoundCol");
    compoundCol.setType(MajorTypeId.DATE_RANGE);
    compoundCol.setTable(table);
    table.setColumns(new Column[] { startCol, endCol, compoundCol });
    table.setDataset(dataset);
    table.setName("tableName");
    Import imp = new Import(table);
    imp.setName("importTest");
    DateRangeTypeCompound compoundStore = new DateRangeTypeCompound(startCol.getName(), endCol.getName(), new BitSetStore(BitSet.valueOf(new byte[] { 0b1000 }), new BitSet(), 4));
    // 0b1000 is a binary representation of 8 so that the 4th is set to make sure that BitSet length is 4.
    ColumnStore startStore = new IntegerDateStore(new ShortArrayStore(new short[] { 1, 2, 3, 4 }, Short.MIN_VALUE));
    ColumnStore endStore = new IntegerDateStore(new ShortArrayStore(new short[] { 5, 6, 7, 8 }, Short.MIN_VALUE));
    Bucket bucket = new Bucket(0, 1, 4, new ColumnStore[] { startStore, endStore, compoundStore }, Collections.emptySet(), new int[0], new int[0], imp);
    compoundStore.setParent(bucket);
    CentralRegistry registry = new CentralRegistry();
    registry.register(dataset);
    registry.register(startCol);
    registry.register(endCol);
    registry.register(compoundCol);
    registry.register(table);
    registry.register(imp);
    registry.register(bucket);
    final Validator validator = Validators.newValidator();
    SerializationTestUtil.forType(Bucket.class).registry(registry).injectables(new Injectable() {

        @Override
        public MutableInjectableValues inject(MutableInjectableValues values) {
            return values.add(Validator.class, validator);
        }
    }).test(bucket);
}
Also used : Injectable(com.bakdata.conquery.io.jackson.Injectable) CQTable(com.bakdata.conquery.apiv1.query.concept.filter.CQTable) Table(com.bakdata.conquery.models.datasets.Table) Import(com.bakdata.conquery.models.datasets.Import) BitSetStore(com.bakdata.conquery.models.events.stores.primitive.BitSetStore) Dataset(com.bakdata.conquery.models.datasets.Dataset) MutableInjectableValues(com.bakdata.conquery.io.jackson.MutableInjectableValues) BitSet(java.util.BitSet) ShortArrayStore(com.bakdata.conquery.models.events.stores.primitive.ShortArrayStore) DateRangeTypeCompound(com.bakdata.conquery.models.events.stores.specific.DateRangeTypeCompound) CentralRegistry(com.bakdata.conquery.models.identifiable.CentralRegistry) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) Column(com.bakdata.conquery.models.datasets.Column) Bucket(com.bakdata.conquery.models.events.Bucket) IntegerDateStore(com.bakdata.conquery.models.events.stores.primitive.IntegerDateStore) Validator(javax.validation.Validator) Test(org.junit.jupiter.api.Test) IdMapSerialisationTest(com.bakdata.conquery.models.identifiable.IdMapSerialisationTest)

Example 9 with ColumnStore

use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.

the class DateRangeParserTest method onlyClosed.

@Test
public void onlyClosed() {
    final DateRangeParser parser = new DateRangeParser(new ConqueryConfig());
    List.of(CDateRange.of(10, 11), CDateRange.exactly(10)).forEach(parser::addLine);
    final ColumnStore actual = parser.decideType();
    assertThat(actual).isInstanceOf(DateRangeTypeDateRange.class);
    assertThat(((IntegerDateStore) ((DateRangeTypeDateRange) actual).getMinStore()).getStore()).isInstanceOfAny(ByteArrayStore.class, RebasingStore.class);
}
Also used : ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) IntegerDateStore(com.bakdata.conquery.models.events.stores.primitive.IntegerDateStore) Test(org.junit.jupiter.api.Test)

Example 10 with ColumnStore

use of com.bakdata.conquery.models.events.stores.root.ColumnStore in project conquery by bakdata.

the class ImportJob method selectBucket.

/**
 * - remap Entity-Ids to global
 * - calculate per-Entity regions of Bucklet (start/end)
 * - split stores
 */
private Bucket selectBucket(Map<Integer, Integer> localStarts, Map<Integer, Integer> localLengths, ColumnStore[] stores, DictionaryMapping primaryMapping, Import imp, int bucketId, List<Integer> localEntities) {
    final int root = bucketSize * bucketId;
    IntList selectionStart = new IntArrayList();
    IntList selectionLength = new IntArrayList();
    IntSet entities = new IntOpenHashSet();
    // First entity of Bucket starts at 0, the following are appended.
    int[] entityStarts = new int[bucketSize];
    int[] entityEnds = new int[bucketSize];
    Arrays.fill(entityEnds, -1);
    Arrays.fill(entityStarts, -1);
    int currentStart = 0;
    for (int position = 0; position < bucketSize; position++) {
        int globalId = root + position;
        int localId = primaryMapping.target2Source(globalId);
        if (localId == -1) {
            continue;
        }
        if (!localStarts.containsKey(localId)) {
            continue;
        }
        entities.add(globalId);
        final int length = localLengths.get(localId);
        selectionStart.add(localStarts.get(localId));
        selectionLength.add(length);
        entityStarts[position] = currentStart;
        entityEnds[position] = currentStart + length;
        currentStart += length;
    }
    // copy only the parts of the bucket we need
    final ColumnStore[] bucketStores = Arrays.stream(stores).map(store -> store.select(selectionStart.toIntArray(), selectionLength.toIntArray())).toArray(ColumnStore[]::new);
    return new Bucket(bucketId, root, selectionLength.intStream().sum(), bucketStores, entities, entityStarts, entityEnds, imp);
}
Also used : IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) java.util(java.util) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) Getter(lombok.Getter) PreprocessedHeader(com.bakdata.conquery.models.preproc.PreprocessedHeader) RequiredArgsConstructor(lombok.RequiredArgsConstructor) PreprocessedData(com.bakdata.conquery.models.preproc.PreprocessedData) com.bakdata.conquery.models.identifiable.ids.specific(com.bakdata.conquery.models.identifiable.ids.specific) NamespaceStorage(com.bakdata.conquery.io.storage.NamespaceStorage) IdMutex(com.bakdata.conquery.models.identifiable.IdMutex) com.bakdata.conquery.models.datasets(com.bakdata.conquery.models.datasets) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) WorkerInformation(com.bakdata.conquery.models.worker.WorkerInformation) BadRequestException(javax.ws.rs.BadRequestException) JSONException(com.bakdata.conquery.models.exceptions.JSONException) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser) PreprocessedReader(com.bakdata.conquery.models.preproc.PreprocessedReader) ResourceUtil(com.bakdata.conquery.util.ResourceUtil) Bucket(com.bakdata.conquery.models.events.Bucket) MajorTypeId(com.bakdata.conquery.models.events.MajorTypeId) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) com.bakdata.conquery.models.messages.namespaces.specific(com.bakdata.conquery.models.messages.namespaces.specific) Collectors(java.util.stream.Collectors) Entity(com.bakdata.conquery.models.query.entity.Entity) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) Slf4j(lombok.extern.slf4j.Slf4j) IntList(it.unimi.dsi.fastutil.ints.IntList) Response(javax.ws.rs.core.Response) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) WebApplicationException(javax.ws.rs.WebApplicationException) IntSet(it.unimi.dsi.fastutil.ints.IntSet) PreprocessedDictionaries(com.bakdata.conquery.models.preproc.PreprocessedDictionaries) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Namespace(com.bakdata.conquery.models.worker.Namespace) InputStream(java.io.InputStream) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) Bucket(com.bakdata.conquery.models.events.Bucket) IntSet(it.unimi.dsi.fastutil.ints.IntSet) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Aggregations

ColumnStore (com.bakdata.conquery.models.events.stores.root.ColumnStore)12 StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)6 Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)5 ConqueryConfig (com.bakdata.conquery.models.config.ConqueryConfig)4 IntOpenHashSet (it.unimi.dsi.fastutil.ints.IntOpenHashSet)4 DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)3 Bucket (com.bakdata.conquery.models.events.Bucket)3 MajorTypeId (com.bakdata.conquery.models.events.MajorTypeId)3 Int2IntAVLTreeMap (it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap)3 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)3 IntList (it.unimi.dsi.fastutil.ints.IntList)3 NamespaceStorage (com.bakdata.conquery.io.storage.NamespaceStorage)2 com.bakdata.conquery.models.datasets (com.bakdata.conquery.models.datasets)2 IntegerDateStore (com.bakdata.conquery.models.events.stores.primitive.IntegerDateStore)2 IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)2 JSONException (com.bakdata.conquery.models.exceptions.JSONException)2 IdMutex (com.bakdata.conquery.models.identifiable.IdMutex)2 com.bakdata.conquery.models.identifiable.ids.specific (com.bakdata.conquery.models.identifiable.ids.specific)2 com.bakdata.conquery.models.messages.namespaces.specific (com.bakdata.conquery.models.messages.namespaces.specific)2 PreprocessedData (com.bakdata.conquery.models.preproc.PreprocessedData)2