use of io.druid.segment.Cursor in project druid by druid-io.
the class GroupByQueryEngineV2 method process.
public static Sequence<Row> process(final GroupByQuery query, final StorageAdapter storageAdapter, final StupidPool<ByteBuffer> intermediateResultsBufferPool, final GroupByQueryConfig config) {
if (storageAdapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
if (intervals.size() != 1) {
throw new IAE("Should only have one interval, got[%s]", intervals);
}
final Sequence<Cursor> cursors = storageAdapter.makeCursors(Filters.toFilter(query.getDimFilter()), intervals.get(0), query.getVirtualColumns(), query.getGranularity(), false);
final ResourceHolder<ByteBuffer> bufferHolder = intermediateResultsBufferPool.take();
final String fudgeTimestampString = Strings.emptyToNull(query.getContextValue(GroupByStrategyV2.CTX_KEY_FUDGE_TIMESTAMP, ""));
final DateTime fudgeTimestamp = fudgeTimestampString == null ? null : new DateTime(Long.parseLong(fudgeTimestampString));
return Sequences.concat(Sequences.withBaggage(Sequences.map(cursors, new Function<Cursor, Sequence<Row>>() {
@Override
public Sequence<Row> apply(final Cursor cursor) {
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, GroupByEngineIterator>() {
@Override
public GroupByEngineIterator make() {
ColumnSelectorPlus<GroupByColumnSelectorStrategy>[] selectorPlus = DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, query.getDimensions(), cursor);
return new GroupByEngineIterator(query, config, cursor, bufferHolder.get(), fudgeTimestamp, createGroupBySelectorPlus(selectorPlus));
}
@Override
public void cleanup(GroupByEngineIterator iterFromMake) {
iterFromMake.close();
}
});
}
}), new Closeable() {
@Override
public void close() throws IOException {
CloseQuietly.close(bufferHolder);
}
}));
}
use of io.druid.segment.Cursor in project druid by druid-io.
the class PooledTopNAlgorithm method scanAndAggregateDefault.
/**
* Use aggressive loop unrolling to aggregate the data
*
* How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
* The offsets into the output buffer are precalculated and stored in aggregatorOffsets
*
* For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
* See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
*
* This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
*
* If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
* blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
*
* Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
* still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
* (they simply take longer to start with).
*/
private static void scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
if (params.getCardinality() < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
}
final ByteBuffer resultsBuf = params.getResultsBuf();
final int numBytesPerRecord = params.getNumBytesPerRecord();
final int[] aggregatorSizes = params.getAggregatorSizes();
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
final int[] aggregatorOffsets = new int[aggregatorSizes.length];
for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
aggregatorOffsets[j] = offset;
offset += aggregatorSizes[j];
}
final int aggSize = theAggregators.length;
final int aggExtra = aggSize % AGG_UNROLL_COUNT;
int currentPosition = 0;
while (!cursor.isDoneOrInterrupted()) {
final IndexedInts dimValues = dimSelector.getRow();
final int dimSize = dimValues.size();
final int dimExtra = dimSize % AGG_UNROLL_COUNT;
switch(dimExtra) {
case 7:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
case 6:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
case 5:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
case 4:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
case 3:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
case 2:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
case 1:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
}
for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
}
cursor.advanceUninterruptibly();
}
}
use of io.druid.segment.Cursor in project druid by druid-io.
the class PooledTopNAlgorithm method scanAndAggregate.
@Override
protected void scanAndAggregate(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators, final int numProcessed) {
final Cursor cursor = params.getCursor();
if (specializeGeneric1AggPooledTopN && theAggregators.length == 1) {
scanAndAggregateGeneric1Agg(params, positions, theAggregators[0], cursor);
} else if (specializeGeneric2AggPooledTopN && theAggregators.length == 2) {
scanAndAggregateGeneric2Agg(params, positions, theAggregators, cursor);
} else {
scanAndAggregateDefault(params, positions, theAggregators);
}
BaseQuery.checkInterrupted();
}
use of io.druid.segment.Cursor in project druid by druid-io.
the class TimeExtractionTopNAlgorithm method scanAndAggregate.
@Override
protected void scanAndAggregate(TopNParams params, int[] dimValSelector, Map<String, Aggregator[]> aggregatesStore, int numProcessed) {
if (params.getCardinality() < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
}
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
while (!cursor.isDone()) {
final String key = dimSelector.lookupName(dimSelector.getRow().get(0));
Aggregator[] theAggregators = aggregatesStore.get(key);
if (theAggregators == null) {
theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
aggregatesStore.put(key, theAggregators);
}
for (Aggregator aggregator : theAggregators) {
aggregator.aggregate();
}
cursor.advance();
}
}
use of io.druid.segment.Cursor in project druid by druid-io.
the class TopNQueryEngine method query.
public Sequence<Result<TopNResultValue>> query(final TopNQuery query, final StorageAdapter adapter) {
if (adapter == null) {
throw new SegmentMissingException("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<Interval> queryIntervals = query.getQuerySegmentSpec().getIntervals();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
final Granularity granularity = query.getGranularity();
final Function<Cursor, Result<TopNResultValue>> mapFn = getMapFn(query, adapter);
Preconditions.checkArgument(queryIntervals.size() == 1, "Can only handle a single interval, got[%s]", queryIntervals);
return Sequences.filter(Sequences.map(adapter.makeCursors(filter, queryIntervals.get(0), query.getVirtualColumns(), granularity, query.isDescending()), new Function<Cursor, Result<TopNResultValue>>() {
@Override
public Result<TopNResultValue> apply(Cursor input) {
log.debug("Running over cursor[%s]", adapter.getInterval(), input.getTime());
return mapFn.apply(input);
}
}), Predicates.<Result<TopNResultValue>>notNull());
}
Aggregations