Search in sources :

Example 1 with ColumnValues

use of com.bakdata.conquery.models.preproc.parser.ColumnValues in project conquery by bakdata.

the class Preprocessed method combineStores.

/**
 * Combine raw by-Entity data into column stores, appropriately formatted.
 */
@SuppressWarnings("rawtypes")
private Map<String, ColumnStore> combineStores(Int2IntMap entityStart) {
    Map<String, ColumnStore> columnStores = Arrays.stream(columns).parallel().collect(Collectors.toMap(PPColumn::getName, PPColumn::findBestType));
    // This object can be huge!
    Int2ObjectMap<IntList> entityEvents = new Int2ObjectOpenHashMap<>(entityStart.size());
    for (int pos = 0, size = rowEntities.size(); pos < size; pos++) {
        int entity = rowEntities.getInt(pos);
        entityEvents.computeIfAbsent(entity, (ignored) -> new IntArrayList()).add(pos);
    }
    for (int colIdx = 0; colIdx < columns.length; colIdx++) {
        final PPColumn ppColumn = columns[colIdx];
        final ColumnValues columnValues = values[colIdx];
        // No need to preprocess the column further more, if it does not contain values, likely backed by a compound ColumnStore
        if (columnValues == null) {
            continue;
        }
        final ColumnStore store = columnStores.get(ppColumn.getName());
        entityStart.int2IntEntrySet().forEach(entry -> {
            final int entity = entry.getIntKey();
            int outIndex = entry.getIntValue();
            final IntList events = entityEvents.getOrDefault(entity, IntLists.emptyList());
            for (int inIndex : events) {
                if (columnValues.isNull(inIndex)) {
                    store.setNull(outIndex);
                } else {
                    final Object raw = columnValues.get(inIndex);
                    ppColumn.getParser().setValue(store, outIndex, raw);
                }
                outIndex++;
            }
        });
    }
    return columnStores;
}
Also used : Arrays(java.util.Arrays) Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) HashMap(java.util.HashMap) Int2IntAVLTreeMap(it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap) MapTypeGuesser(com.bakdata.conquery.models.preproc.parser.specific.string.MapTypeGuesser) Map(java.util.Map) Parser(com.bakdata.conquery.models.preproc.parser.Parser) IntLists(it.unimi.dsi.fastutil.ints.IntLists) OutputStream(java.io.OutputStream) StringTypeEncoded(com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) MajorTypeId(com.bakdata.conquery.models.events.MajorTypeId) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) StringParser(com.bakdata.conquery.models.preproc.parser.specific.StringParser) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) Collectors(java.util.stream.Collectors) File(java.io.File) Slf4j(lombok.extern.slf4j.Slf4j) IntList(it.unimi.dsi.fastutil.ints.IntList) ColumnValues(com.bakdata.conquery.models.preproc.parser.ColumnValues) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) Data(lombok.Data) IntSummaryStatistics(java.util.IntSummaryStatistics) GZIPOutputStream(java.util.zip.GZIPOutputStream) Jackson(com.bakdata.conquery.io.jackson.Jackson) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) ColumnValues(com.bakdata.conquery.models.preproc.parser.ColumnValues) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Aggregations

Jackson (com.bakdata.conquery.io.jackson.Jackson)1 ConqueryConfig (com.bakdata.conquery.models.config.ConqueryConfig)1 Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)1 MajorTypeId (com.bakdata.conquery.models.events.MajorTypeId)1 ColumnStore (com.bakdata.conquery.models.events.stores.root.ColumnStore)1 StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)1 StringTypeEncoded (com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded)1 ColumnValues (com.bakdata.conquery.models.preproc.parser.ColumnValues)1 Parser (com.bakdata.conquery.models.preproc.parser.Parser)1 StringParser (com.bakdata.conquery.models.preproc.parser.specific.StringParser)1 MapTypeGuesser (com.bakdata.conquery.models.preproc.parser.specific.string.MapTypeGuesser)1 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)1 Int2IntAVLTreeMap (it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap)1 Int2IntMap (it.unimi.dsi.fastutil.ints.Int2IntMap)1 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)1 IntList (it.unimi.dsi.fastutil.ints.IntList)1 IntLists (it.unimi.dsi.fastutil.ints.IntLists)1 File (java.io.File)1