Search in sources :

Example 11 with ColumnSelectorFactory

use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.

the class RowBasedGrouperHelper method createResultRowBasedColumnSelectorFactory.

/**
 * Creates a {@link ColumnSelectorFactory} that can read rows which originate as results of the provided "query".
 *
 * @param query        a groupBy query
 * @param supplier     supplier of result rows from the query
 * @param finalization whether the column capabilities reported by this factory should reflect finalized types
 */
public static ColumnSelectorFactory createResultRowBasedColumnSelectorFactory(final GroupByQuery query, final Supplier<ResultRow> supplier, final RowSignature.Finalization finalization) {
    final RowSignature signature = query.getResultRowSignature(finalization);
    final RowAdapter<ResultRow> adapter = new RowAdapter<ResultRow>() {

        @Override
        public ToLongFunction<ResultRow> timestampFunction() {
            if (query.getResultRowHasTimestamp()) {
                return row -> row.getLong(0);
            } else {
                final long timestamp = query.getUniversalTimestamp().getMillis();
                return row -> timestamp;
            }
        }

        @Override
        public Function<ResultRow, Object> columnFunction(final String columnName) {
            final int columnIndex = signature.indexOf(columnName);
            if (columnIndex < 0) {
                return row -> null;
            } else {
                return row -> row.get(columnIndex);
            }
        }
    };
    // Decorate "signature" so that it returns hasMultipleValues = false. (groupBy does not return multiple values.)
    final ColumnInspector decoratedSignature = new ColumnInspector() {

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String column) {
            final ColumnCapabilities baseCapabilities = signature.getColumnCapabilities(column);
            if (baseCapabilities == null || baseCapabilities.hasMultipleValues().isFalse()) {
                return baseCapabilities;
            } else {
                return ColumnCapabilitiesImpl.copyOf(baseCapabilities).setHasMultipleValues(false);
            }
        }
    };
    return RowBasedColumnSelectorFactory.create(adapter, supplier::get, decoratedSignature, false);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnInspector(org.apache.druid.segment.ColumnInspector) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 12 with ColumnSelectorFactory

use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.

the class GroupByQueryEngineV2 method processNonVectorized.

private static Sequence<ResultRow> processNonVectorized(final GroupByQuery query, final StorageAdapter storageAdapter, final ByteBuffer processingBuffer, @Nullable final DateTime fudgeTimestamp, final GroupByQueryConfig querySpecificConfig, @Nullable final Filter filter, final Interval interval) {
    final Sequence<Cursor> cursors = storageAdapter.makeCursors(filter, interval, query.getVirtualColumns(), query.getGranularity(), false, null);
    return cursors.flatMap(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ResultRow, GroupByEngineIterator<?>>() {

        @Override
        public GroupByEngineIterator make() {
            final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory();
            final ColumnSelectorPlus<GroupByColumnSelectorStrategy>[] selectorPlus = DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, query.getDimensions(), columnSelectorFactory);
            final GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus, query.getResultRowDimensionStart());
            final int cardinalityForArrayAggregation = getCardinalityForArrayAggregation(querySpecificConfig, query, storageAdapter, processingBuffer);
            if (cardinalityForArrayAggregation >= 0) {
                return new ArrayAggregateIterator(query, querySpecificConfig, cursor, processingBuffer, fudgeTimestamp, dims, hasNoExplodingDimensions(columnSelectorFactory, query.getDimensions()), cardinalityForArrayAggregation);
            } else {
                return new HashAggregateIterator(query, querySpecificConfig, cursor, processingBuffer, fudgeTimestamp, dims, hasNoExplodingDimensions(columnSelectorFactory, query.getDimensions()));
            }
        }

        @Override
        public void cleanup(GroupByEngineIterator iterFromMake) {
            iterFromMake.close();
        }
    }));
}
Also used : GroupByColumnSelectorPlus(org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorPlus) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) Cursor(org.apache.druid.segment.Cursor) GroupByColumnSelectorPlus(org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorPlus)

Example 13 with ColumnSelectorFactory

use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.

the class SketchAggregatorFactoryTest method testFactorizeSized.

@Test
public void testFactorizeSized() {
    ColumnSelectorFactory colSelectorFactory = EasyMock.mock(ColumnSelectorFactory.class);
    EasyMock.expect(colSelectorFactory.makeColumnValueSelector(EasyMock.anyString())).andReturn(EasyMock.createMock(ColumnValueSelector.class)).anyTimes();
    EasyMock.replay(colSelectorFactory);
    AggregatorAndSize aggregatorAndSize = AGGREGATOR_16384.factorizeWithSize(colSelectorFactory);
    Assert.assertEquals(48, aggregatorAndSize.getInitialSizeBytes());
    aggregatorAndSize = AGGREGATOR_32768.factorizeWithSize(colSelectorFactory);
    Assert.assertEquals(48, aggregatorAndSize.getInitialSizeBytes());
}
Also used : AggregatorAndSize(org.apache.druid.query.aggregation.AggregatorAndSize) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) Test(org.junit.Test)

Example 14 with ColumnSelectorFactory

use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.

the class ExpressionAggregationBenchmark method compute.

private double compute(final Function<ColumnSelectorFactory, BufferAggregator> aggregatorFactory) {
    final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    final Sequence<Cursor> cursors = adapter.makeCursors(null, index.getDataInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final List<Double> results = cursors.map(cursor -> {
        final BufferAggregator bufferAggregator = aggregatorFactory.apply(cursor.getColumnSelectorFactory());
        bufferAggregator.init(aggregationBuffer, 0);
        while (!cursor.isDone()) {
            bufferAggregator.aggregate(aggregationBuffer, 0);
            cursor.advance();
        }
        final Double dbl = (Double) bufferAggregator.get(aggregationBuffer, 0);
        bufferAggregator.close();
        return dbl;
    }).toList();
    return Iterables.getOnlyElement(results);
}
Also used : Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Measurement(org.openjdk.jmh.annotations.Measurement) Intervals(org.apache.druid.java.util.common.Intervals) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) Blackhole(org.openjdk.jmh.infra.Blackhole) Scope(org.openjdk.jmh.annotations.Scope) Warmup(org.openjdk.jmh.annotations.Warmup) Function(java.util.function.Function) JavaScriptConfig(org.apache.druid.js.JavaScriptConfig) ByteBuffer(java.nio.ByteBuffer) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) BufferAggregator(org.apache.druid.query.aggregation.BufferAggregator) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) ImmutableList(com.google.common.collect.ImmutableList) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) TearDown(org.openjdk.jmh.annotations.TearDown) Sequence(org.apache.druid.java.util.common.guava.Sequence) Setup(org.openjdk.jmh.annotations.Setup) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) Mode(org.openjdk.jmh.annotations.Mode) Closer(org.apache.druid.java.util.common.io.Closer) Param(org.openjdk.jmh.annotations.Param) QueryableIndex(org.apache.druid.segment.QueryableIndex) JavaScriptAggregatorFactory(org.apache.druid.query.aggregation.JavaScriptAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) ValueType(org.apache.druid.segment.column.ValueType) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Level(org.openjdk.jmh.annotations.Level) Fork(org.openjdk.jmh.annotations.Fork) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) BufferAggregator(org.apache.druid.query.aggregation.BufferAggregator)

Example 15 with ColumnSelectorFactory

use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.

the class RowBasedGrouperHelper method getResultRowPredicate.

/**
 * Returns a predicate that filters result rows from a particular "subquery" based on the intervals and dim filters
 * from "query".
 *
 * @param query    outer query
 * @param subquery inner query
 */
private static Predicate<ResultRow> getResultRowPredicate(final GroupByQuery query, final GroupByQuery subquery) {
    final List<Interval> queryIntervals = query.getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
    final SettableSupplier<ResultRow> rowSupplier = new SettableSupplier<>();
    final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory(subquery, rowSupplier, RowSignature.Finalization.UNKNOWN));
    final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
    if (subquery.getUniversalTimestamp() != null && queryIntervals.stream().noneMatch(itvl -> itvl.contains(subquery.getUniversalTimestamp()))) {
        // By the way, if there's a universal timestamp that _does_ match the query intervals, we do nothing special here.
        return row -> false;
    }
    return row -> {
        if (subquery.getResultRowHasTimestamp()) {
            boolean inInterval = false;
            for (Interval queryInterval : queryIntervals) {
                if (queryInterval.contains(row.getLong(0))) {
                    inInterval = true;
                    break;
                }
            }
            if (!inInterval) {
                return false;
            }
        }
        rowSupplier.set(row);
        return filterMatcher.matches();
    };
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) Filter(org.apache.druid.query.filter.Filter) Interval(org.joda.time.Interval)

Aggregations

ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)21 ColumnValueSelector (org.apache.druid.segment.ColumnValueSelector)10 Cursor (org.apache.druid.segment.Cursor)9 DimensionSelector (org.apache.druid.segment.DimensionSelector)8 Test (org.junit.Test)8 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)7 RowBasedColumnSelectorFactory (org.apache.druid.segment.RowBasedColumnSelectorFactory)7 List (java.util.List)6 IndexedInts (org.apache.druid.segment.data.IndexedInts)6 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 ByteBuffer (java.nio.ByteBuffer)4 Arrays (java.util.Arrays)4 Nullable (javax.annotation.Nullable)4 NullHandling (org.apache.druid.common.config.NullHandling)4 ValueMatcher (org.apache.druid.query.filter.ValueMatcher)4 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)4 BaseFloatColumnValueSelector (org.apache.druid.segment.BaseFloatColumnValueSelector)4 ColumnCapabilitiesImpl (org.apache.druid.segment.column.ColumnCapabilitiesImpl)4 ValueType (org.apache.druid.segment.column.ValueType)4