use of io.druid.segment.DimensionSelector in project druid by druid-io.
the class DictionaryBuildingStringGroupByColumnSelectorStrategy method initColumnValues.
@Override
public void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) {
final DimensionSelector dimSelector = (DimensionSelector) selector;
final IndexedInts row = dimSelector.getRow();
final int[] newIds = new int[row.size()];
for (int i = 0; i < row.size(); i++) {
final String value = dimSelector.lookupName(row.get(i));
final int dictId = reverseDictionary.getInt(value);
if (dictId < 0) {
dictionary.add(value);
reverseDictionary.put(value, nextId);
newIds[i] = nextId;
nextId++;
} else {
newIds[i] = dictId;
}
}
valuess[columnIndex] = ArrayBasedIndexedInts.of(newIds);
}
use of io.druid.segment.DimensionSelector in project druid by druid-io.
the class PooledTopNAlgorithm method updateResults.
@Override
protected void updateResults(PooledTopNParams params, int[] positions, BufferAggregator[] theAggregators, TopNResultBuilder resultBuilder) {
if (params.getCardinality() < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
}
final ByteBuffer resultsBuf = params.getResultsBuf();
final int[] aggregatorSizes = params.getAggregatorSizes();
final DimensionSelector dimSelector = params.getDimSelector();
final ValueType outType = query.getDimensionSpec().getOutputType();
final boolean needsResultConversion = outType != ValueType.STRING;
final Function<Object, Object> valueTransformer = TopNMapFn.getValueTransformer(outType);
for (int i = 0; i < positions.length; i++) {
int position = positions[i];
if (position >= 0) {
Object[] vals = new Object[theAggregators.length];
for (int j = 0; j < theAggregators.length; j++) {
vals[j] = theAggregators[j].get(resultsBuf, position);
position += aggregatorSizes[j];
}
Object retVal = dimSelector.lookupName(i);
if (needsResultConversion) {
retVal = valueTransformer.apply(retVal);
}
resultBuilder.addEntry((Comparable) retVal, i, vals);
}
}
}
use of io.druid.segment.DimensionSelector in project druid by druid-io.
the class PooledTopNAlgorithm method scanAndAggregateDefault.
/**
* Use aggressive loop unrolling to aggregate the data
*
* How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
* The offsets into the output buffer are precalculated and stored in aggregatorOffsets
*
* For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
* See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
*
* This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
*
* If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
* blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
*
* Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
* still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
* (they simply take longer to start with).
*/
private static void scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
if (params.getCardinality() < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
}
final ByteBuffer resultsBuf = params.getResultsBuf();
final int numBytesPerRecord = params.getNumBytesPerRecord();
final int[] aggregatorSizes = params.getAggregatorSizes();
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
final int[] aggregatorOffsets = new int[aggregatorSizes.length];
for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
aggregatorOffsets[j] = offset;
offset += aggregatorSizes[j];
}
final int aggSize = theAggregators.length;
final int aggExtra = aggSize % AGG_UNROLL_COUNT;
int currentPosition = 0;
while (!cursor.isDoneOrInterrupted()) {
final IndexedInts dimValues = dimSelector.getRow();
final int dimSize = dimValues.size();
final int dimExtra = dimSize % AGG_UNROLL_COUNT;
switch(dimExtra) {
case 7:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
case 6:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
case 5:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
case 4:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
case 3:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
case 2:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
case 1:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
}
for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
}
cursor.advanceUninterruptibly();
}
}
use of io.druid.segment.DimensionSelector in project druid by druid-io.
the class PooledTopNAlgorithm method makeInitParams.
@Override
public PooledTopNParams makeInitParams(ColumnSelectorPlus selectorPlus, Cursor cursor) {
ResourceHolder<ByteBuffer> resultsBufHolder = bufferPool.take();
ByteBuffer resultsBuf = resultsBufHolder.get();
resultsBuf.clear();
final DimensionSelector dimSelector = (DimensionSelector) selectorPlus.getSelector();
final int cardinality = dimSelector.getValueCardinality();
if (cardinality < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with no dictionary");
}
final TopNMetricSpecBuilder<int[]> arrayProvider = new BaseArrayProvider<int[]>(dimSelector, query, capabilities) {
private final int[] positions = new int[cardinality];
@Override
public int[] build() {
Pair<Integer, Integer> startEnd = computeStartEnd(cardinality);
Arrays.fill(positions, 0, startEnd.lhs, SKIP_POSITION_VALUE);
Arrays.fill(positions, startEnd.lhs, startEnd.rhs, INIT_POSITION_VALUE);
Arrays.fill(positions, startEnd.rhs, positions.length, SKIP_POSITION_VALUE);
return positions;
}
};
final int numBytesToWorkWith = resultsBuf.remaining();
final int[] aggregatorSizes = new int[query.getAggregatorSpecs().size()];
int numBytesPerRecord = 0;
for (int i = 0; i < query.getAggregatorSpecs().size(); ++i) {
aggregatorSizes[i] = query.getAggregatorSpecs().get(i).getMaxIntermediateSize();
numBytesPerRecord += aggregatorSizes[i];
}
final int numValuesPerPass = numBytesPerRecord > 0 ? numBytesToWorkWith / numBytesPerRecord : cardinality;
return PooledTopNParams.builder().withSelectorPlus(selectorPlus).withCursor(cursor).withResultsBufHolder(resultsBufHolder).withResultsBuf(resultsBuf).withArrayProvider(arrayProvider).withNumBytesPerRecord(numBytesPerRecord).withNumValuesPerPass(numValuesPerPass).withAggregatorSizes(aggregatorSizes).build();
}
use of io.druid.segment.DimensionSelector in project druid by druid-io.
the class TimeExtractionTopNAlgorithm method scanAndAggregate.
@Override
protected void scanAndAggregate(TopNParams params, int[] dimValSelector, Map<String, Aggregator[]> aggregatesStore, int numProcessed) {
if (params.getCardinality() < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
}
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
while (!cursor.isDone()) {
final String key = dimSelector.lookupName(dimSelector.getRow().get(0));
Aggregator[] theAggregators = aggregatesStore.get(key);
if (theAggregators == null) {
theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
aggregatesStore.put(key, theAggregators);
}
for (Aggregator aggregator : theAggregators) {
aggregator.aggregate();
}
cursor.advance();
}
}
Aggregations