use of io.questdb.tasks.VectorAggregateTask in project questdb by bluestreak01.
the class GroupByNotKeyedVectorRecordCursorFactory method getCursor.
@Override
public RecordCursor getCursor(SqlExecutionContext executionContext) throws SqlException {
final MessageBus bus = executionContext.getMessageBus();
final PageFrameCursor cursor = base.getPageFrameCursor(executionContext);
final int vafCount = vafList.size();
// clear state of aggregate functions
for (int i = 0; i < vafCount; i++) {
vafList.getQuick(i).clear();
}
final RingQueue<VectorAggregateTask> queue = bus.getVectorAggregateQueue();
final Sequence pubSeq = bus.getVectorAggregatePubSeq();
this.entryPool.clear();
this.activeEntries.clear();
int queuedCount = 0;
int ownCount = 0;
int reclaimed = 0;
int total = 0;
doneLatch.reset();
// check if this executed via worker pool
final Thread thread = Thread.currentThread();
final int workerId;
if (thread instanceof Worker) {
workerId = ((Worker) thread).getWorkerId();
} else {
workerId = 0;
}
PageFrame frame;
while ((frame = cursor.next()) != null) {
for (int i = 0; i < vafCount; i++) {
final VectorAggregateFunction vaf = vafList.getQuick(i);
final int columnIndex = vaf.getColumnIndex();
// for functions like `count()`, that do not have arguments we are required to provide
// count of rows in table in a form of "pageSize >> shr". Since `vaf` doesn't provide column
// this code used column 0. Assumption here that column 0 is fixed size.
// This assumption only holds because our aggressive algorithm for "top down columns", e.g.
// the algorithm that forces page frame to provide only columns required by the select. At the time
// of writing this code there is no way to return variable length column out of non-keyed aggregation
// query. This might change if we introduce something like `first(string)`. When this happens we will
// need to rethink our way of computing size for the count. This would be either type checking column
// 0 and working out size differently or finding any fixed-size column and using that.
final long pageAddress = columnIndex > -1 ? frame.getPageAddress(columnIndex) : 0;
final long pageSize = columnIndex > -1 ? frame.getPageSize(columnIndex) : frame.getPageSize(0);
final int colSizeShr = columnIndex > -1 ? frame.getColumnShiftBits(columnIndex) : frame.getColumnShiftBits(0);
long seq = pubSeq.next();
if (seq < 0) {
// diy the func
// vaf need to know which column it is hitting in the frame and will need to
// aggregate between frames until done
vaf.aggregate(pageAddress, pageSize, colSizeShr, workerId);
ownCount++;
} else {
final VectorAggregateEntry entry = entryPool.next();
// null pRosti means that we do not need keyed aggregation
entry.of(queuedCount++, vaf, null, 0, pageAddress, pageSize, colSizeShr, doneLatch);
activeEntries.add(entry);
queue.get(seq).entry = entry;
pubSeq.done(seq);
}
total++;
}
}
// all done? great start consuming the queue we just published
// how do we get to the end? If we consume our own queue there is chance we will be consuming
// aggregation tasks not related to this execution (we work in concurrent environment)
// To deal with that we need to have our own checklist.
// start at the back to reduce chance of clashing
reclaimed = getRunWhatsLeft(queuedCount, reclaimed, workerId, activeEntries, doneLatch, LOG);
LOG.info().$("done [total=").$(total).$(", ownCount=").$(ownCount).$(", reclaimed=").$(reclaimed).$(", queuedCount=").$(queuedCount).$(']').$();
return this.cursor.of(cursor);
}
use of io.questdb.tasks.VectorAggregateTask in project questdb by bluestreak01.
the class GroupByRecordCursorFactory method getCursor.
@Override
public RecordCursor getCursor(SqlExecutionContext executionContext) throws SqlException {
// clear maps
for (int i = 0, n = pRosti.length; i < n; i++) {
Rosti.clear(pRosti[i]);
}
final MessageBus bus = executionContext.getMessageBus();
final PageFrameCursor cursor = base.getPageFrameCursor(executionContext);
final int vafCount = vafList.size();
// clear state of aggregate functions
for (int i = 0; i < vafCount; i++) {
vafList.getQuick(i).clear();
}
final RingQueue<VectorAggregateTask> queue = bus.getVectorAggregateQueue();
final Sequence pubSeq = bus.getVectorAggregatePubSeq();
this.entryPool.clear();
this.activeEntries.clear();
int queuedCount = 0;
int ownCount = 0;
int reclaimed = 0;
int total = 0;
doneLatch.reset();
// check if this executed via worker pool
final Thread thread = Thread.currentThread();
final int workerId;
if (thread instanceof Worker) {
workerId = ((Worker) thread).getWorkerId();
} else {
workerId = 0;
}
PageFrame frame;
while ((frame = cursor.next()) != null) {
final long keyAddress = frame.getPageAddress(keyColumnIndex);
for (int i = 0; i < vafCount; i++) {
final VectorAggregateFunction vaf = vafList.getQuick(i);
// when column index = -1 we assume that vector function does not have value
// argument, and it can only derive count via memory size
final int columnIndex = vaf.getColumnIndex();
// for functions like `count()`, that do not have arguments we are required to provide
// count of rows in table in a form of "pageSize >> shr". Since `vaf` doesn't provide column
// this code used column 0. Assumption here that column 0 is fixed size.
// This assumption only holds because our aggressive algorithm for "top down columns", e.g.
// the algorithm that forces page frame to provide only columns required by the select. At the time
// of writing this code there is no way to return variable length column out of non-keyed aggregation
// query. This might change if we introduce something like `first(string)`. When this happens we will
// need to rethink our way of computing size for the count. This would be either type checking column
// 0 and working out size differently or finding any fixed-size column and using that.
final long valueAddress = columnIndex > -1 ? frame.getPageAddress(columnIndex) : 0;
final int pageColIndex = columnIndex > -1 ? columnIndex : 0;
final int columnSizeShr = frame.getColumnShiftBits(pageColIndex);
final long valueAddressSize = frame.getPageSize(pageColIndex);
long seq = pubSeq.next();
if (seq < 0) {
if (keyAddress == 0) {
vaf.aggregate(valueAddress, valueAddressSize, columnSizeShr, workerId);
} else {
vaf.aggregate(pRosti[workerId], keyAddress, valueAddress, valueAddressSize, columnSizeShr, workerId);
}
ownCount++;
} else {
if (keyAddress != 0 || valueAddress != 0) {
final VectorAggregateEntry entry = entryPool.next();
if (keyAddress == 0) {
entry.of(queuedCount++, vaf, null, 0, valueAddress, valueAddressSize, columnSizeShr, doneLatch);
} else {
entry.of(queuedCount++, vaf, pRosti, keyAddress, valueAddress, valueAddressSize, columnSizeShr, doneLatch);
}
activeEntries.add(entry);
queue.get(seq).entry = entry;
pubSeq.done(seq);
}
}
total++;
}
}
// all done? great start consuming the queue we just published
// how do we get to the end? If we consume our own queue there is chance we will be consuming
// aggregation tasks not related to this execution (we work in concurrent environment)
// To deal with that we need to have our own checklist.
// start at the back to reduce chance of clashing
reclaimed = GroupByNotKeyedVectorRecordCursorFactory.getRunWhatsLeft(queuedCount, reclaimed, workerId, activeEntries, doneLatch, LOG);
long pRosti0 = pRosti[0];
if (pRosti.length > 1) {
LOG.debug().$("merging").$();
for (int j = 0; j < vafCount; j++) {
final VectorAggregateFunction vaf = vafList.getQuick(j);
for (int i = 1, n = pRosti.length; i < n; i++) {
vaf.merge(pRosti0, pRosti[i]);
}
vaf.wrapUp(pRosti0);
}
} else {
for (int j = 0; j < vafCount; j++) {
vafList.getQuick(j).wrapUp(pRosti0);
}
}
LOG.info().$("done [total=").$(total).$(", ownCount=").$(ownCount).$(", reclaimed=").$(reclaimed).$(", queuedCount=").$(queuedCount).$(']').$();
return this.cursor.of(cursor);
}
Aggregations