Search in sources :

Example 11 with IndexedInts

use of io.druid.segment.data.IndexedInts in project druid by druid-io.

the class StringDimensionHandler method getEncodedKeyComponentFromColumn.

@Override
public int[] getEncodedKeyComponentFromColumn(Closeable column, int currRow) {
    DictionaryEncodedColumn dict = (DictionaryEncodedColumn) column;
    int[] theVals;
    if (dict.hasMultipleValues()) {
        final IndexedInts dimVals = dict.getMultiValueRow(currRow);
        theVals = new int[dimVals.size()];
        for (int i = 0; i < theVals.length; ++i) {
            theVals[i] = dimVals.get(i);
        }
    } else {
        theVals = new int[1];
        theVals[0] = dict.getSingleValueRow(currRow);
    }
    return theVals;
}
Also used : IndexedInts(io.druid.segment.data.IndexedInts) DictionaryEncodedColumn(io.druid.segment.column.DictionaryEncodedColumn)

Example 12 with IndexedInts

use of io.druid.segment.data.IndexedInts in project druid by druid-io.

the class PooledTopNAlgorithm method scanAndAggregateDefault.

/**
   * Use aggressive loop unrolling to aggregate the data
   *
   * How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
   * The offsets into the output buffer are precalculated and stored in aggregatorOffsets
   *
   * For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
   * See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
   *
   * This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
   *
   * If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
   * blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
   *
   * Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
   * still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
   * (they simply take longer to start with).
   */
private static void scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final ByteBuffer resultsBuf = params.getResultsBuf();
    final int numBytesPerRecord = params.getNumBytesPerRecord();
    final int[] aggregatorSizes = params.getAggregatorSizes();
    final Cursor cursor = params.getCursor();
    final DimensionSelector dimSelector = params.getDimSelector();
    final int[] aggregatorOffsets = new int[aggregatorSizes.length];
    for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
        aggregatorOffsets[j] = offset;
        offset += aggregatorSizes[j];
    }
    final int aggSize = theAggregators.length;
    final int aggExtra = aggSize % AGG_UNROLL_COUNT;
    int currentPosition = 0;
    while (!cursor.isDoneOrInterrupted()) {
        final IndexedInts dimValues = dimSelector.getRow();
        final int dimSize = dimValues.size();
        final int dimExtra = dimSize % AGG_UNROLL_COUNT;
        switch(dimExtra) {
            case 7:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
            case 6:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
            case 5:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
            case 4:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
            case 3:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
            case 2:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
            case 1:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
        }
        for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
        }
        cursor.advanceUninterruptibly();
    }
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) IndexedInts(io.druid.segment.data.IndexedInts) Cursor(io.druid.segment.Cursor) ByteBuffer(java.nio.ByteBuffer)

Example 13 with IndexedInts

use of io.druid.segment.data.IndexedInts in project druid by druid-io.

the class StringTopNColumnSelectorStrategy method dimExtractionScanAndAggregateWithCardinalityKnown.

private void dimExtractionScanAndAggregateWithCardinalityKnown(TopNQuery query, Cursor cursor, DimensionSelector selector, Aggregator[][] rowSelector, Map<String, Aggregator[]> aggregatesStore) {
    while (!cursor.isDone()) {
        final IndexedInts dimValues = selector.getRow();
        for (int i = 0; i < dimValues.size(); ++i) {
            final int dimIndex = dimValues.get(i);
            Aggregator[] theAggregators = rowSelector[dimIndex];
            if (theAggregators == null) {
                final String key = selector.lookupName(dimIndex);
                theAggregators = aggregatesStore.get(key);
                if (theAggregators == null) {
                    theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs());
                    aggregatesStore.put(key, theAggregators);
                }
                rowSelector[dimIndex] = theAggregators;
            }
            for (Aggregator aggregator : theAggregators) {
                aggregator.aggregate();
            }
        }
        cursor.advance();
    }
}
Also used : IndexedInts(io.druid.segment.data.IndexedInts) Aggregator(io.druid.query.aggregation.Aggregator)

Example 14 with IndexedInts

use of io.druid.segment.data.IndexedInts in project druid by druid-io.

the class StringTopNColumnSelectorStrategy method dimExtractionScanAndAggregateWithCardinalityUnknown.

private void dimExtractionScanAndAggregateWithCardinalityUnknown(TopNQuery query, Cursor cursor, DimensionSelector selector, Map<String, Aggregator[]> aggregatesStore) {
    while (!cursor.isDone()) {
        final IndexedInts dimValues = selector.getRow();
        for (int i = 0; i < dimValues.size(); ++i) {
            final int dimIndex = dimValues.get(i);
            final String key = selector.lookupName(dimIndex);
            Aggregator[] theAggregators = aggregatesStore.get(key);
            if (theAggregators == null) {
                theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs());
                aggregatesStore.put(key, theAggregators);
            }
            for (Aggregator aggregator : theAggregators) {
                aggregator.aggregate();
            }
        }
        cursor.advance();
    }
}
Also used : IndexedInts(io.druid.segment.data.IndexedInts) Aggregator(io.druid.query.aggregation.Aggregator)

Example 15 with IndexedInts

use of io.druid.segment.data.IndexedInts in project druid by druid-io.

the class CompressedVSizeIndexedSupplier method fromIterable.

public static CompressedVSizeIndexedSupplier fromIterable(Iterable<IndexedInts> objectsIterable, int maxValue, final ByteOrder byteOrder, CompressedObjectStrategy.CompressionStrategy compression) {
    Iterator<IndexedInts> objects = objectsIterable.iterator();
    List<Integer> offsetList = new ArrayList<>();
    List<Integer> values = new ArrayList<>();
    int offset = 0;
    while (objects.hasNext()) {
        IndexedInts next = objects.next();
        offsetList.add(offset);
        for (int i = 0; i < next.size(); i++) {
            values.add(next.get(i));
        }
        offset += next.size();
    }
    offsetList.add(offset);
    int offsetMax = offset;
    CompressedVSizeIntsIndexedSupplier headerSupplier = CompressedVSizeIntsIndexedSupplier.fromList(offsetList, offsetMax, CompressedVSizeIntsIndexedSupplier.maxIntsInBufferForValue(offsetMax), byteOrder, compression);
    CompressedVSizeIntsIndexedSupplier valuesSupplier = CompressedVSizeIntsIndexedSupplier.fromList(values, maxValue, CompressedVSizeIntsIndexedSupplier.maxIntsInBufferForValue(maxValue), byteOrder, compression);
    return new CompressedVSizeIndexedSupplier(headerSupplier, valuesSupplier);
}
Also used : IndexedInts(io.druid.segment.data.IndexedInts) ArrayList(java.util.ArrayList) CompressedVSizeIntsIndexedSupplier(io.druid.segment.data.CompressedVSizeIntsIndexedSupplier)

Aggregations

IndexedInts (io.druid.segment.data.IndexedInts)41 DimensionSelector (io.druid.segment.DimensionSelector)14 ValueMatcher (io.druid.query.filter.ValueMatcher)9 ArrayBasedIndexedInts (io.druid.segment.data.ArrayBasedIndexedInts)8 Test (org.junit.Test)8 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)6 Cursor (io.druid.segment.Cursor)6 BooleanValueMatcher (io.druid.segment.filter.BooleanValueMatcher)6 ArrayList (java.util.ArrayList)5 ByteBuffer (java.nio.ByteBuffer)3 BitSet (java.util.BitSet)3 Function (com.google.common.base.Function)2 ImmutableList (com.google.common.collect.ImmutableList)2 IAE (io.druid.java.util.common.IAE)2 Aggregator (io.druid.query.aggregation.Aggregator)2 DimFilter (io.druid.query.filter.DimFilter)2 ColumnSelectorFactory (io.druid.segment.ColumnSelectorFactory)2 IdLookup (io.druid.segment.IdLookup)2 CompressedVSizeIntsIndexedSupplier (io.druid.segment.data.CompressedVSizeIntsIndexedSupplier)2 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)2