Search in sources :

Example 6 with DimensionSelector

use of io.druid.segment.DimensionSelector in project druid by druid-io.

the class DictionaryBuildingStringGroupByColumnSelectorStrategy method initColumnValues.

@Override
public void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) {
    final DimensionSelector dimSelector = (DimensionSelector) selector;
    final IndexedInts row = dimSelector.getRow();
    final int[] newIds = new int[row.size()];
    for (int i = 0; i < row.size(); i++) {
        final String value = dimSelector.lookupName(row.get(i));
        final int dictId = reverseDictionary.getInt(value);
        if (dictId < 0) {
            dictionary.add(value);
            reverseDictionary.put(value, nextId);
            newIds[i] = nextId;
            nextId++;
        } else {
            newIds[i] = dictId;
        }
    }
    valuess[columnIndex] = ArrayBasedIndexedInts.of(newIds);
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) ArrayBasedIndexedInts(io.druid.segment.data.ArrayBasedIndexedInts) IndexedInts(io.druid.segment.data.IndexedInts)

Example 7 with DimensionSelector

use of io.druid.segment.DimensionSelector in project druid by druid-io.

the class PooledTopNAlgorithm method updateResults.

@Override
protected void updateResults(PooledTopNParams params, int[] positions, BufferAggregator[] theAggregators, TopNResultBuilder resultBuilder) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final ByteBuffer resultsBuf = params.getResultsBuf();
    final int[] aggregatorSizes = params.getAggregatorSizes();
    final DimensionSelector dimSelector = params.getDimSelector();
    final ValueType outType = query.getDimensionSpec().getOutputType();
    final boolean needsResultConversion = outType != ValueType.STRING;
    final Function<Object, Object> valueTransformer = TopNMapFn.getValueTransformer(outType);
    for (int i = 0; i < positions.length; i++) {
        int position = positions[i];
        if (position >= 0) {
            Object[] vals = new Object[theAggregators.length];
            for (int j = 0; j < theAggregators.length; j++) {
                vals[j] = theAggregators[j].get(resultsBuf, position);
                position += aggregatorSizes[j];
            }
            Object retVal = dimSelector.lookupName(i);
            if (needsResultConversion) {
                retVal = valueTransformer.apply(retVal);
            }
            resultBuilder.addEntry((Comparable) retVal, i, vals);
        }
    }
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) ValueType(io.druid.segment.column.ValueType) ByteBuffer(java.nio.ByteBuffer)

Example 8 with DimensionSelector

use of io.druid.segment.DimensionSelector in project druid by druid-io.

the class PooledTopNAlgorithm method scanAndAggregateDefault.

/**
   * Use aggressive loop unrolling to aggregate the data
   *
   * How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
   * The offsets into the output buffer are precalculated and stored in aggregatorOffsets
   *
   * For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
   * See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
   *
   * This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
   *
   * If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
   * blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
   *
   * Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
   * still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
   * (they simply take longer to start with).
   */
private static void scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final ByteBuffer resultsBuf = params.getResultsBuf();
    final int numBytesPerRecord = params.getNumBytesPerRecord();
    final int[] aggregatorSizes = params.getAggregatorSizes();
    final Cursor cursor = params.getCursor();
    final DimensionSelector dimSelector = params.getDimSelector();
    final int[] aggregatorOffsets = new int[aggregatorSizes.length];
    for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
        aggregatorOffsets[j] = offset;
        offset += aggregatorSizes[j];
    }
    final int aggSize = theAggregators.length;
    final int aggExtra = aggSize % AGG_UNROLL_COUNT;
    int currentPosition = 0;
    while (!cursor.isDoneOrInterrupted()) {
        final IndexedInts dimValues = dimSelector.getRow();
        final int dimSize = dimValues.size();
        final int dimExtra = dimSize % AGG_UNROLL_COUNT;
        switch(dimExtra) {
            case 7:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
            case 6:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
            case 5:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
            case 4:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
            case 3:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
            case 2:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
            case 1:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
        }
        for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
        }
        cursor.advanceUninterruptibly();
    }
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) IndexedInts(io.druid.segment.data.IndexedInts) Cursor(io.druid.segment.Cursor) ByteBuffer(java.nio.ByteBuffer)

Example 9 with DimensionSelector

use of io.druid.segment.DimensionSelector in project druid by druid-io.

the class PooledTopNAlgorithm method makeInitParams.

@Override
public PooledTopNParams makeInitParams(ColumnSelectorPlus selectorPlus, Cursor cursor) {
    ResourceHolder<ByteBuffer> resultsBufHolder = bufferPool.take();
    ByteBuffer resultsBuf = resultsBufHolder.get();
    resultsBuf.clear();
    final DimensionSelector dimSelector = (DimensionSelector) selectorPlus.getSelector();
    final int cardinality = dimSelector.getValueCardinality();
    if (cardinality < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with no dictionary");
    }
    final TopNMetricSpecBuilder<int[]> arrayProvider = new BaseArrayProvider<int[]>(dimSelector, query, capabilities) {

        private final int[] positions = new int[cardinality];

        @Override
        public int[] build() {
            Pair<Integer, Integer> startEnd = computeStartEnd(cardinality);
            Arrays.fill(positions, 0, startEnd.lhs, SKIP_POSITION_VALUE);
            Arrays.fill(positions, startEnd.lhs, startEnd.rhs, INIT_POSITION_VALUE);
            Arrays.fill(positions, startEnd.rhs, positions.length, SKIP_POSITION_VALUE);
            return positions;
        }
    };
    final int numBytesToWorkWith = resultsBuf.remaining();
    final int[] aggregatorSizes = new int[query.getAggregatorSpecs().size()];
    int numBytesPerRecord = 0;
    for (int i = 0; i < query.getAggregatorSpecs().size(); ++i) {
        aggregatorSizes[i] = query.getAggregatorSpecs().get(i).getMaxIntermediateSize();
        numBytesPerRecord += aggregatorSizes[i];
    }
    final int numValuesPerPass = numBytesPerRecord > 0 ? numBytesToWorkWith / numBytesPerRecord : cardinality;
    return PooledTopNParams.builder().withSelectorPlus(selectorPlus).withCursor(cursor).withResultsBufHolder(resultsBufHolder).withResultsBuf(resultsBuf).withArrayProvider(arrayProvider).withNumBytesPerRecord(numBytesPerRecord).withNumValuesPerPass(numValuesPerPass).withAggregatorSizes(aggregatorSizes).build();
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) ByteBuffer(java.nio.ByteBuffer)

Example 10 with DimensionSelector

use of io.druid.segment.DimensionSelector in project druid by druid-io.

the class TimeExtractionTopNAlgorithm method scanAndAggregate.

@Override
protected void scanAndAggregate(TopNParams params, int[] dimValSelector, Map<String, Aggregator[]> aggregatesStore, int numProcessed) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final Cursor cursor = params.getCursor();
    final DimensionSelector dimSelector = params.getDimSelector();
    while (!cursor.isDone()) {
        final String key = dimSelector.lookupName(dimSelector.getRow().get(0));
        Aggregator[] theAggregators = aggregatesStore.get(key);
        if (theAggregators == null) {
            theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
            aggregatesStore.put(key, theAggregators);
        }
        for (Aggregator aggregator : theAggregators) {
            aggregator.aggregate();
        }
        cursor.advance();
    }
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) Aggregator(io.druid.query.aggregation.Aggregator) Cursor(io.druid.segment.Cursor)

Aggregations

DimensionSelector (io.druid.segment.DimensionSelector)26 IndexedInts (io.druid.segment.data.IndexedInts)14 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)12 Test (org.junit.Test)11 Cursor (io.druid.segment.Cursor)9 ValueMatcher (io.druid.query.filter.ValueMatcher)5 ObjectColumnSelector (io.druid.segment.ObjectColumnSelector)5 FloatColumnSelector (io.druid.segment.FloatColumnSelector)4 LongColumnSelector (io.druid.segment.LongColumnSelector)4 Interval (org.joda.time.Interval)4 DimensionSpec (io.druid.query.dimension.DimensionSpec)3 ExtractionFn (io.druid.query.extraction.ExtractionFn)3 RuntimeShapeInspector (io.druid.query.monomorphicprocessing.RuntimeShapeInspector)3 TestFloatColumnSelector (io.druid.segment.TestFloatColumnSelector)3 TestLongColumnSelector (io.druid.segment.TestLongColumnSelector)3 VirtualColumns (io.druid.segment.VirtualColumns)3 ByteBuffer (java.nio.ByteBuffer)3 ImmutableList (com.google.common.collect.ImmutableList)2 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)2 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)2