Search in sources :

Example 1 with Cursor

use of io.druid.segment.Cursor in project druid by druid-io.

the class GroupByQueryEngineV2 method process.

public static Sequence<Row> process(final GroupByQuery query, final StorageAdapter storageAdapter, final StupidPool<ByteBuffer> intermediateResultsBufferPool, final GroupByQueryConfig config) {
    if (storageAdapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    if (intervals.size() != 1) {
        throw new IAE("Should only have one interval, got[%s]", intervals);
    }
    final Sequence<Cursor> cursors = storageAdapter.makeCursors(Filters.toFilter(query.getDimFilter()), intervals.get(0), query.getVirtualColumns(), query.getGranularity(), false);
    final ResourceHolder<ByteBuffer> bufferHolder = intermediateResultsBufferPool.take();
    final String fudgeTimestampString = Strings.emptyToNull(query.getContextValue(GroupByStrategyV2.CTX_KEY_FUDGE_TIMESTAMP, ""));
    final DateTime fudgeTimestamp = fudgeTimestampString == null ? null : new DateTime(Long.parseLong(fudgeTimestampString));
    return Sequences.concat(Sequences.withBaggage(Sequences.map(cursors, new Function<Cursor, Sequence<Row>>() {

        @Override
        public Sequence<Row> apply(final Cursor cursor) {
            return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, GroupByEngineIterator>() {

                @Override
                public GroupByEngineIterator make() {
                    ColumnSelectorPlus<GroupByColumnSelectorStrategy>[] selectorPlus = DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, query.getDimensions(), cursor);
                    return new GroupByEngineIterator(query, config, cursor, bufferHolder.get(), fudgeTimestamp, createGroupBySelectorPlus(selectorPlus));
                }

                @Override
                public void cleanup(GroupByEngineIterator iterFromMake) {
                    iterFromMake.close();
                }
            });
        }
    }), new Closeable() {

        @Override
        public void close() throws IOException {
            CloseQuietly.close(bufferHolder);
        }
    }));
}
Also used : GroupByColumnSelectorPlus(io.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorPlus) ColumnSelectorPlus(io.druid.query.ColumnSelectorPlus) Closeable(java.io.Closeable) BaseSequence(io.druid.java.util.common.guava.BaseSequence) Sequence(io.druid.java.util.common.guava.Sequence) IAE(io.druid.java.util.common.IAE) Cursor(io.druid.segment.Cursor) ByteBuffer(java.nio.ByteBuffer) DateTime(org.joda.time.DateTime) ISE(io.druid.java.util.common.ISE) Interval(org.joda.time.Interval)

Example 2 with Cursor

use of io.druid.segment.Cursor in project druid by druid-io.

the class PooledTopNAlgorithm method scanAndAggregateDefault.

/**
   * Use aggressive loop unrolling to aggregate the data
   *
   * How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
   * The offsets into the output buffer are precalculated and stored in aggregatorOffsets
   *
   * For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
   * See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
   *
   * This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
   *
   * If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
   * blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
   *
   * Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
   * still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
   * (they simply take longer to start with).
   */
private static void scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final ByteBuffer resultsBuf = params.getResultsBuf();
    final int numBytesPerRecord = params.getNumBytesPerRecord();
    final int[] aggregatorSizes = params.getAggregatorSizes();
    final Cursor cursor = params.getCursor();
    final DimensionSelector dimSelector = params.getDimSelector();
    final int[] aggregatorOffsets = new int[aggregatorSizes.length];
    for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
        aggregatorOffsets[j] = offset;
        offset += aggregatorSizes[j];
    }
    final int aggSize = theAggregators.length;
    final int aggExtra = aggSize % AGG_UNROLL_COUNT;
    int currentPosition = 0;
    while (!cursor.isDoneOrInterrupted()) {
        final IndexedInts dimValues = dimSelector.getRow();
        final int dimSize = dimValues.size();
        final int dimExtra = dimSize % AGG_UNROLL_COUNT;
        switch(dimExtra) {
            case 7:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
            case 6:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
            case 5:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
            case 4:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
            case 3:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
            case 2:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
            case 1:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
        }
        for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
        }
        cursor.advanceUninterruptibly();
    }
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) IndexedInts(io.druid.segment.data.IndexedInts) Cursor(io.druid.segment.Cursor) ByteBuffer(java.nio.ByteBuffer)

Example 3 with Cursor

use of io.druid.segment.Cursor in project druid by druid-io.

the class PooledTopNAlgorithm method scanAndAggregate.

@Override
protected void scanAndAggregate(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators, final int numProcessed) {
    final Cursor cursor = params.getCursor();
    if (specializeGeneric1AggPooledTopN && theAggregators.length == 1) {
        scanAndAggregateGeneric1Agg(params, positions, theAggregators[0], cursor);
    } else if (specializeGeneric2AggPooledTopN && theAggregators.length == 2) {
        scanAndAggregateGeneric2Agg(params, positions, theAggregators, cursor);
    } else {
        scanAndAggregateDefault(params, positions, theAggregators);
    }
    BaseQuery.checkInterrupted();
}
Also used : Cursor(io.druid.segment.Cursor)

Example 4 with Cursor

use of io.druid.segment.Cursor in project druid by druid-io.

the class TimeExtractionTopNAlgorithm method scanAndAggregate.

@Override
protected void scanAndAggregate(TopNParams params, int[] dimValSelector, Map<String, Aggregator[]> aggregatesStore, int numProcessed) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final Cursor cursor = params.getCursor();
    final DimensionSelector dimSelector = params.getDimSelector();
    while (!cursor.isDone()) {
        final String key = dimSelector.lookupName(dimSelector.getRow().get(0));
        Aggregator[] theAggregators = aggregatesStore.get(key);
        if (theAggregators == null) {
            theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
            aggregatesStore.put(key, theAggregators);
        }
        for (Aggregator aggregator : theAggregators) {
            aggregator.aggregate();
        }
        cursor.advance();
    }
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) Aggregator(io.druid.query.aggregation.Aggregator) Cursor(io.druid.segment.Cursor)

Example 5 with Cursor

use of io.druid.segment.Cursor in project druid by druid-io.

the class TopNQueryEngine method query.

public Sequence<Result<TopNResultValue>> query(final TopNQuery query, final StorageAdapter adapter) {
    if (adapter == null) {
        throw new SegmentMissingException("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<Interval> queryIntervals = query.getQuerySegmentSpec().getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
    final Granularity granularity = query.getGranularity();
    final Function<Cursor, Result<TopNResultValue>> mapFn = getMapFn(query, adapter);
    Preconditions.checkArgument(queryIntervals.size() == 1, "Can only handle a single interval, got[%s]", queryIntervals);
    return Sequences.filter(Sequences.map(adapter.makeCursors(filter, queryIntervals.get(0), query.getVirtualColumns(), granularity, query.isDescending()), new Function<Cursor, Result<TopNResultValue>>() {

        @Override
        public Result<TopNResultValue> apply(Cursor input) {
            log.debug("Running over cursor[%s]", adapter.getInterval(), input.getTime());
            return mapFn.apply(input);
        }
    }), Predicates.<Result<TopNResultValue>>notNull());
}
Also used : Function(com.google.common.base.Function) Filter(io.druid.query.filter.Filter) SegmentMissingException(io.druid.segment.SegmentMissingException) Granularity(io.druid.java.util.common.granularity.Granularity) Cursor(io.druid.segment.Cursor) Interval(org.joda.time.Interval) Result(io.druid.query.Result)

Aggregations

Cursor (io.druid.segment.Cursor)31 ArrayList (java.util.ArrayList)17 StorageAdapter (io.druid.segment.StorageAdapter)16 List (java.util.List)16 Benchmark (org.openjdk.jmh.annotations.Benchmark)15 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)15 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)15 QueryableIndexStorageAdapter (io.druid.segment.QueryableIndexStorageAdapter)14 DimFilter (io.druid.query.filter.DimFilter)11 Filter (io.druid.query.filter.Filter)11 BoundDimFilter (io.druid.query.filter.BoundDimFilter)9 OrDimFilter (io.druid.query.filter.OrDimFilter)9 DimensionSelector (io.druid.segment.DimensionSelector)9 Interval (org.joda.time.Interval)9 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)8 AndDimFilter (io.druid.query.filter.AndDimFilter)8 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)7 IndexedInts (io.druid.segment.data.IndexedInts)6 SelectorFilter (io.druid.segment.filter.SelectorFilter)6 AndFilter (io.druid.segment.filter.AndFilter)5