Search in sources :

Example 1 with MessageBus

use of io.questdb.MessageBus in project questdb by bluestreak01.

the class GroupByNotKeyedVectorRecordCursorFactory method getCursor.

@Override
public RecordCursor getCursor(SqlExecutionContext executionContext) throws SqlException {
    final MessageBus bus = executionContext.getMessageBus();
    final PageFrameCursor cursor = base.getPageFrameCursor(executionContext);
    final int vafCount = vafList.size();
    // clear state of aggregate functions
    for (int i = 0; i < vafCount; i++) {
        vafList.getQuick(i).clear();
    }
    final RingQueue<VectorAggregateTask> queue = bus.getVectorAggregateQueue();
    final Sequence pubSeq = bus.getVectorAggregatePubSeq();
    this.entryPool.clear();
    this.activeEntries.clear();
    int queuedCount = 0;
    int ownCount = 0;
    int reclaimed = 0;
    int total = 0;
    doneLatch.reset();
    // check if this executed via worker pool
    final Thread thread = Thread.currentThread();
    final int workerId;
    if (thread instanceof Worker) {
        workerId = ((Worker) thread).getWorkerId();
    } else {
        workerId = 0;
    }
    PageFrame frame;
    while ((frame = cursor.next()) != null) {
        for (int i = 0; i < vafCount; i++) {
            final VectorAggregateFunction vaf = vafList.getQuick(i);
            final int columnIndex = vaf.getColumnIndex();
            // for functions like `count()`, that do not have arguments we are required to provide
            // count of rows in table in a form of "pageSize >> shr". Since `vaf` doesn't provide column
            // this code used column 0. Assumption here that column 0 is fixed size.
            // This assumption only holds because our aggressive algorithm for "top down columns", e.g.
            // the algorithm that forces page frame to provide only columns required by the select. At the time
            // of writing this code there is no way to return variable length column out of non-keyed aggregation
            // query. This might change if we introduce something like `first(string)`. When this happens we will
            // need to rethink our way of computing size for the count. This would be either type checking column
            // 0 and working out size differently or finding any fixed-size column and using that.
            final long pageAddress = columnIndex > -1 ? frame.getPageAddress(columnIndex) : 0;
            final long pageSize = columnIndex > -1 ? frame.getPageSize(columnIndex) : frame.getPageSize(0);
            final int colSizeShr = columnIndex > -1 ? frame.getColumnShiftBits(columnIndex) : frame.getColumnShiftBits(0);
            long seq = pubSeq.next();
            if (seq < 0) {
                // diy the func
                // vaf need to know which column it is hitting in the frame and will need to
                // aggregate between frames until done
                vaf.aggregate(pageAddress, pageSize, colSizeShr, workerId);
                ownCount++;
            } else {
                final VectorAggregateEntry entry = entryPool.next();
                // null pRosti means that we do not need keyed aggregation
                entry.of(queuedCount++, vaf, null, 0, pageAddress, pageSize, colSizeShr, doneLatch);
                activeEntries.add(entry);
                queue.get(seq).entry = entry;
                pubSeq.done(seq);
            }
            total++;
        }
    }
    // all done? great start consuming the queue we just published
    // how do we get to the end? If we consume our own queue there is chance we will be consuming
    // aggregation tasks not related to this execution (we work in concurrent environment)
    // To deal with that we need to have our own checklist.
    // start at the back to reduce chance of clashing
    reclaimed = getRunWhatsLeft(queuedCount, reclaimed, workerId, activeEntries, doneLatch, LOG);
    LOG.info().$("done [total=").$(total).$(", ownCount=").$(ownCount).$(", reclaimed=").$(reclaimed).$(", queuedCount=").$(queuedCount).$(']').$();
    return this.cursor.of(cursor);
}
Also used : VectorAggregateTask(io.questdb.tasks.VectorAggregateTask) MessageBus(io.questdb.MessageBus) Worker(io.questdb.mp.Worker) Sequence(io.questdb.mp.Sequence)

Example 2 with MessageBus

use of io.questdb.MessageBus in project questdb by bluestreak01.

the class GroupByRecordCursorFactory method getCursor.

@Override
public RecordCursor getCursor(SqlExecutionContext executionContext) throws SqlException {
    // clear maps
    for (int i = 0, n = pRosti.length; i < n; i++) {
        Rosti.clear(pRosti[i]);
    }
    final MessageBus bus = executionContext.getMessageBus();
    final PageFrameCursor cursor = base.getPageFrameCursor(executionContext);
    final int vafCount = vafList.size();
    // clear state of aggregate functions
    for (int i = 0; i < vafCount; i++) {
        vafList.getQuick(i).clear();
    }
    final RingQueue<VectorAggregateTask> queue = bus.getVectorAggregateQueue();
    final Sequence pubSeq = bus.getVectorAggregatePubSeq();
    this.entryPool.clear();
    this.activeEntries.clear();
    int queuedCount = 0;
    int ownCount = 0;
    int reclaimed = 0;
    int total = 0;
    doneLatch.reset();
    // check if this executed via worker pool
    final Thread thread = Thread.currentThread();
    final int workerId;
    if (thread instanceof Worker) {
        workerId = ((Worker) thread).getWorkerId();
    } else {
        workerId = 0;
    }
    PageFrame frame;
    while ((frame = cursor.next()) != null) {
        final long keyAddress = frame.getPageAddress(keyColumnIndex);
        for (int i = 0; i < vafCount; i++) {
            final VectorAggregateFunction vaf = vafList.getQuick(i);
            // when column index = -1 we assume that vector function does not have value
            // argument, and it can only derive count via memory size
            final int columnIndex = vaf.getColumnIndex();
            // for functions like `count()`, that do not have arguments we are required to provide
            // count of rows in table in a form of "pageSize >> shr". Since `vaf` doesn't provide column
            // this code used column 0. Assumption here that column 0 is fixed size.
            // This assumption only holds because our aggressive algorithm for "top down columns", e.g.
            // the algorithm that forces page frame to provide only columns required by the select. At the time
            // of writing this code there is no way to return variable length column out of non-keyed aggregation
            // query. This might change if we introduce something like `first(string)`. When this happens we will
            // need to rethink our way of computing size for the count. This would be either type checking column
            // 0 and working out size differently or finding any fixed-size column and using that.
            final long valueAddress = columnIndex > -1 ? frame.getPageAddress(columnIndex) : 0;
            final int pageColIndex = columnIndex > -1 ? columnIndex : 0;
            final int columnSizeShr = frame.getColumnShiftBits(pageColIndex);
            final long valueAddressSize = frame.getPageSize(pageColIndex);
            long seq = pubSeq.next();
            if (seq < 0) {
                if (keyAddress == 0) {
                    vaf.aggregate(valueAddress, valueAddressSize, columnSizeShr, workerId);
                } else {
                    vaf.aggregate(pRosti[workerId], keyAddress, valueAddress, valueAddressSize, columnSizeShr, workerId);
                }
                ownCount++;
            } else {
                if (keyAddress != 0 || valueAddress != 0) {
                    final VectorAggregateEntry entry = entryPool.next();
                    if (keyAddress == 0) {
                        entry.of(queuedCount++, vaf, null, 0, valueAddress, valueAddressSize, columnSizeShr, doneLatch);
                    } else {
                        entry.of(queuedCount++, vaf, pRosti, keyAddress, valueAddress, valueAddressSize, columnSizeShr, doneLatch);
                    }
                    activeEntries.add(entry);
                    queue.get(seq).entry = entry;
                    pubSeq.done(seq);
                }
            }
            total++;
        }
    }
    // all done? great start consuming the queue we just published
    // how do we get to the end? If we consume our own queue there is chance we will be consuming
    // aggregation tasks not related to this execution (we work in concurrent environment)
    // To deal with that we need to have our own checklist.
    // start at the back to reduce chance of clashing
    reclaimed = GroupByNotKeyedVectorRecordCursorFactory.getRunWhatsLeft(queuedCount, reclaimed, workerId, activeEntries, doneLatch, LOG);
    long pRosti0 = pRosti[0];
    if (pRosti.length > 1) {
        LOG.debug().$("merging").$();
        for (int j = 0; j < vafCount; j++) {
            final VectorAggregateFunction vaf = vafList.getQuick(j);
            for (int i = 1, n = pRosti.length; i < n; i++) {
                vaf.merge(pRosti0, pRosti[i]);
            }
            vaf.wrapUp(pRosti0);
        }
    } else {
        for (int j = 0; j < vafCount; j++) {
            vafList.getQuick(j).wrapUp(pRosti0);
        }
    }
    LOG.info().$("done [total=").$(total).$(", ownCount=").$(ownCount).$(", reclaimed=").$(reclaimed).$(", queuedCount=").$(queuedCount).$(']').$();
    return this.cursor.of(cursor);
}
Also used : VectorAggregateTask(io.questdb.tasks.VectorAggregateTask) MessageBus(io.questdb.MessageBus) Worker(io.questdb.mp.Worker) Sequence(io.questdb.mp.Sequence)

Example 3 with MessageBus

use of io.questdb.MessageBus in project questdb by bluestreak01.

the class LatestByAllIndexedRecordCursor method buildTreeMap.

@Override
protected void buildTreeMap(SqlExecutionContext executionContext) throws SqlException {
    final MessageBus bus = executionContext.getMessageBus();
    final RingQueue<LatestByTask> queue = bus.getLatestByQueue();
    final Sequence pubSeq = bus.getLatestByPubSeq();
    final Sequence subSeq = bus.getLatestBySubSeq();
    int keyCount = getSymbolTable(columnIndex).size() + 1;
    rows.extend(keyCount);
    GeoHashNative.iota(rows.getAddress(), rows.getCapacity(), 0);
    final int workerCount = executionContext.getWorkerCount();
    final long chunkSize = (keyCount + workerCount - 1) / workerCount;
    final int taskCount = (int) ((keyCount + chunkSize - 1) / chunkSize);
    final long argumentsAddress = LatestByArguments.allocateMemoryArray(taskCount);
    for (long i = 0; i < taskCount; ++i) {
        final long klo = i * chunkSize;
        final long khi = Long.min(klo + chunkSize, keyCount);
        final long argsAddress = argumentsAddress + i * LatestByArguments.MEMORY_SIZE;
        LatestByArguments.setRowsAddress(argsAddress, rows.getAddress());
        LatestByArguments.setRowsCapacity(argsAddress, rows.getCapacity());
        LatestByArguments.setKeyLo(argsAddress, klo);
        LatestByArguments.setKeyHi(argsAddress, khi);
        LatestByArguments.setRowsSize(argsAddress, 0);
    }
    int hashColumnIndex = -1;
    int hashColumnType = ColumnType.UNDEFINED;
    long prefixesAddress = 0;
    long prefixesCount = 0;
    if (this.prefixes.size() > 2) {
        hashColumnIndex = (int) prefixes.get(0);
        hashColumnType = (int) prefixes.get(1);
        prefixesAddress = prefixes.getAddress() + 2 * Long.BYTES;
        prefixesCount = prefixes.size() - 2;
    }
    DataFrame frame;
    // frame metadata is based on TableReader, which is "full" metadata
    // this cursor works with subset of columns, which warrants column index remap
    int frameColumnIndex = columnIndexes.getQuick(columnIndex);
    final TableReader reader = this.dataFrameCursor.getTableReader();
    long foundRowCount = 0;
    while ((frame = this.dataFrameCursor.next()) != null && foundRowCount < keyCount) {
        doneLatch.reset();
        final BitmapIndexReader indexReader = frame.getBitmapIndexReader(frameColumnIndex, BitmapIndexReader.DIR_BACKWARD);
        final long rowLo = frame.getRowLo();
        final long rowHi = frame.getRowHi() - 1;
        final long keyBaseAddress = indexReader.getKeyBaseAddress();
        final long keysMemorySize = indexReader.getKeyMemorySize();
        final long valueBaseAddress = indexReader.getValueBaseAddress();
        final long valuesMemorySize = indexReader.getValueMemorySize();
        final int valueBlockCapacity = indexReader.getValueBlockCapacity();
        final long unIndexedNullCount = indexReader.getUnIndexedNullCount();
        final int partitionIndex = frame.getPartitionIndex();
        long hashColumnAddress = 0;
        // hashColumnIndex can be -1 for latest by part only (no prefixes to match)
        if (hashColumnIndex > -1) {
            final int columnBase = reader.getColumnBase(partitionIndex);
            final int primaryColumnIndex = TableReader.getPrimaryColumnIndex(columnBase, hashColumnIndex);
            final MemoryR column = reader.getColumn(primaryColumnIndex);
            hashColumnAddress = column.getPageAddress(0);
        }
        // -1 must be dead case here
        final int hashesColumnSize = ColumnType.isGeoHash(hashColumnType) ? getPow2SizeOfGeoHashType(hashColumnType) : -1;
        int queuedCount = 0;
        for (long i = 0; i < taskCount; ++i) {
            final long argsAddress = argumentsAddress + i * LatestByArguments.MEMORY_SIZE;
            final long found = LatestByArguments.getRowsSize(argsAddress);
            final long keyHi = LatestByArguments.getKeyHi(argsAddress);
            final long keyLo = LatestByArguments.getKeyLo(argsAddress);
            // Skip range if all keys found
            if (found >= keyHi - keyLo) {
                continue;
            }
            // Update hash column address with current frame value
            LatestByArguments.setHashesAddress(argsAddress, hashColumnAddress);
            final long seq = pubSeq.next();
            if (seq < 0) {
                GeoHashNative.latestByAndFilterPrefix(keyBaseAddress, keysMemorySize, valueBaseAddress, valuesMemorySize, argsAddress, unIndexedNullCount, rowHi, rowLo, partitionIndex, valueBlockCapacity, hashColumnAddress, hashesColumnSize, prefixesAddress, prefixesCount);
            } else {
                queue.get(seq).of(keyBaseAddress, keysMemorySize, valueBaseAddress, valuesMemorySize, argsAddress, unIndexedNullCount, rowHi, rowLo, partitionIndex, valueBlockCapacity, hashColumnAddress, hashesColumnSize, prefixesAddress, prefixesCount, doneLatch);
                pubSeq.done(seq);
                queuedCount++;
            }
        }
        // this should fix deadlock with 1 worker configuration
        while (doneLatch.getCount() > -queuedCount) {
            long seq = subSeq.next();
            if (seq > -1) {
                queue.get(seq).run();
                subSeq.done(seq);
            }
        }
        doneLatch.await(queuedCount);
        // Reset found counter
        foundRowCount = 0;
        for (int i = 0; i < taskCount; i++) {
            final long address = argumentsAddress + i * LatestByArguments.MEMORY_SIZE;
            foundRowCount += LatestByArguments.getRowsSize(address);
        }
    }
    final long rowCount = GeoHashNative.slideFoundBlocks(argumentsAddress, taskCount);
    LatestByArguments.releaseMemoryArray(argumentsAddress, taskCount);
    aLimit = rowCount;
    aIndex = indexShift;
    postProcessRows();
}
Also used : MessageBus(io.questdb.MessageBus) BitmapIndexReader(io.questdb.cairo.BitmapIndexReader) LatestByTask(io.questdb.tasks.LatestByTask) Sequence(io.questdb.mp.Sequence) DataFrame(io.questdb.cairo.sql.DataFrame) MemoryR(io.questdb.cairo.vm.api.MemoryR) TableReader(io.questdb.cairo.TableReader)

Aggregations

MessageBus (io.questdb.MessageBus)3 Sequence (io.questdb.mp.Sequence)3 Worker (io.questdb.mp.Worker)2 VectorAggregateTask (io.questdb.tasks.VectorAggregateTask)2 BitmapIndexReader (io.questdb.cairo.BitmapIndexReader)1 TableReader (io.questdb.cairo.TableReader)1 DataFrame (io.questdb.cairo.sql.DataFrame)1 MemoryR (io.questdb.cairo.vm.api.MemoryR)1 LatestByTask (io.questdb.tasks.LatestByTask)1