use of io.druid.segment.data.IndexedInts in project druid by druid-io.
the class StringDimensionHandler method getEncodedKeyComponentFromColumn.
@Override
public int[] getEncodedKeyComponentFromColumn(Closeable column, int currRow) {
DictionaryEncodedColumn dict = (DictionaryEncodedColumn) column;
int[] theVals;
if (dict.hasMultipleValues()) {
final IndexedInts dimVals = dict.getMultiValueRow(currRow);
theVals = new int[dimVals.size()];
for (int i = 0; i < theVals.length; ++i) {
theVals[i] = dimVals.get(i);
}
} else {
theVals = new int[1];
theVals[0] = dict.getSingleValueRow(currRow);
}
return theVals;
}
use of io.druid.segment.data.IndexedInts in project druid by druid-io.
the class PooledTopNAlgorithm method scanAndAggregateDefault.
/**
* Use aggressive loop unrolling to aggregate the data
*
* How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
* The offsets into the output buffer are precalculated and stored in aggregatorOffsets
*
* For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
* See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
*
* This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
*
* If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
* blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
*
* Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
* still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
* (they simply take longer to start with).
*/
private static void scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
if (params.getCardinality() < 0) {
throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
}
final ByteBuffer resultsBuf = params.getResultsBuf();
final int numBytesPerRecord = params.getNumBytesPerRecord();
final int[] aggregatorSizes = params.getAggregatorSizes();
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
final int[] aggregatorOffsets = new int[aggregatorSizes.length];
for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
aggregatorOffsets[j] = offset;
offset += aggregatorSizes[j];
}
final int aggSize = theAggregators.length;
final int aggExtra = aggSize % AGG_UNROLL_COUNT;
int currentPosition = 0;
while (!cursor.isDoneOrInterrupted()) {
final IndexedInts dimValues = dimSelector.getRow();
final int dimSize = dimValues.size();
final int dimExtra = dimSize % AGG_UNROLL_COUNT;
switch(dimExtra) {
case 7:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
case 6:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
case 5:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
case 4:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
case 3:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
case 2:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
case 1:
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
}
for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
}
cursor.advanceUninterruptibly();
}
}
use of io.druid.segment.data.IndexedInts in project druid by druid-io.
the class StringTopNColumnSelectorStrategy method dimExtractionScanAndAggregateWithCardinalityKnown.
private void dimExtractionScanAndAggregateWithCardinalityKnown(TopNQuery query, Cursor cursor, DimensionSelector selector, Aggregator[][] rowSelector, Map<String, Aggregator[]> aggregatesStore) {
while (!cursor.isDone()) {
final IndexedInts dimValues = selector.getRow();
for (int i = 0; i < dimValues.size(); ++i) {
final int dimIndex = dimValues.get(i);
Aggregator[] theAggregators = rowSelector[dimIndex];
if (theAggregators == null) {
final String key = selector.lookupName(dimIndex);
theAggregators = aggregatesStore.get(key);
if (theAggregators == null) {
theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs());
aggregatesStore.put(key, theAggregators);
}
rowSelector[dimIndex] = theAggregators;
}
for (Aggregator aggregator : theAggregators) {
aggregator.aggregate();
}
}
cursor.advance();
}
}
use of io.druid.segment.data.IndexedInts in project druid by druid-io.
the class StringTopNColumnSelectorStrategy method dimExtractionScanAndAggregateWithCardinalityUnknown.
private void dimExtractionScanAndAggregateWithCardinalityUnknown(TopNQuery query, Cursor cursor, DimensionSelector selector, Map<String, Aggregator[]> aggregatesStore) {
while (!cursor.isDone()) {
final IndexedInts dimValues = selector.getRow();
for (int i = 0; i < dimValues.size(); ++i) {
final int dimIndex = dimValues.get(i);
final String key = selector.lookupName(dimIndex);
Aggregator[] theAggregators = aggregatesStore.get(key);
if (theAggregators == null) {
theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs());
aggregatesStore.put(key, theAggregators);
}
for (Aggregator aggregator : theAggregators) {
aggregator.aggregate();
}
}
cursor.advance();
}
}
use of io.druid.segment.data.IndexedInts in project druid by druid-io.
the class CompressedVSizeIndexedSupplier method fromIterable.
public static CompressedVSizeIndexedSupplier fromIterable(Iterable<IndexedInts> objectsIterable, int maxValue, final ByteOrder byteOrder, CompressedObjectStrategy.CompressionStrategy compression) {
Iterator<IndexedInts> objects = objectsIterable.iterator();
List<Integer> offsetList = new ArrayList<>();
List<Integer> values = new ArrayList<>();
int offset = 0;
while (objects.hasNext()) {
IndexedInts next = objects.next();
offsetList.add(offset);
for (int i = 0; i < next.size(); i++) {
values.add(next.get(i));
}
offset += next.size();
}
offsetList.add(offset);
int offsetMax = offset;
CompressedVSizeIntsIndexedSupplier headerSupplier = CompressedVSizeIntsIndexedSupplier.fromList(offsetList, offsetMax, CompressedVSizeIntsIndexedSupplier.maxIntsInBufferForValue(offsetMax), byteOrder, compression);
CompressedVSizeIntsIndexedSupplier valuesSupplier = CompressedVSizeIntsIndexedSupplier.fromList(values, maxValue, CompressedVSizeIntsIndexedSupplier.maxIntsInBufferForValue(maxValue), byteOrder, compression);
return new CompressedVSizeIndexedSupplier(headerSupplier, valuesSupplier);
}
Aggregations