Search in sources :

Example 41 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class RowBasedGrouperHelper method makeGrouperIterator.

public static CloseableGrouperIterator<RowBasedKey, ResultRow> makeGrouperIterator(final Grouper<RowBasedKey> grouper, final GroupByQuery query, @Nullable final List<DimensionSpec> dimsToInclude, final Closeable closeable) {
    final boolean includeTimestamp = query.getResultRowHasTimestamp();
    final BitSet dimsToIncludeBitSet = new BitSet(query.getDimensions().size());
    final int resultRowDimensionStart = query.getResultRowDimensionStart();
    final BitSet groupingAggregatorsBitSet = new BitSet(query.getAggregatorSpecs().size());
    final Object[] groupingAggregatorValues = new Long[query.getAggregatorSpecs().size()];
    if (dimsToInclude != null) {
        for (DimensionSpec dimensionSpec : dimsToInclude) {
            String outputName = dimensionSpec.getOutputName();
            final int dimIndex = query.getResultRowSignature().indexOf(outputName);
            if (dimIndex >= 0) {
                dimsToIncludeBitSet.set(dimIndex - resultRowDimensionStart);
            }
        }
        // KeyDimensionNames are the input column names of dimensions. Its required since aggregators are not aware of the
        // output column names.
        // As we exclude certain dimensions from the result row, the value for any grouping_id aggregators have to change
        // to reflect the new grouping dimensions, that aggregation is being done upon. We will mark the indices which have
        // grouping aggregators and update the value for each row at those indices.
        Set<String> keyDimensionNames = dimsToInclude.stream().map(DimensionSpec::getDimension).collect(Collectors.toSet());
        for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
            AggregatorFactory aggregatorFactory = query.getAggregatorSpecs().get(i);
            if (aggregatorFactory instanceof GroupingAggregatorFactory) {
                groupingAggregatorsBitSet.set(i);
                groupingAggregatorValues[i] = ((GroupingAggregatorFactory) aggregatorFactory).withKeyDimensions(keyDimensionNames).getValue();
            }
        }
    }
    return new CloseableGrouperIterator<>(grouper.iterator(true), entry -> {
        final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
        // Add timestamp, maybe.
        if (includeTimestamp) {
            final DateTime timestamp = query.getGranularity().toDateTime(((long) (entry.getKey().getKey()[0])));
            resultRow.set(0, timestamp.getMillis());
        }
        // Add dimensions.
        for (int i = resultRowDimensionStart; i < entry.getKey().getKey().length; i++) {
            if (dimsToInclude == null || dimsToIncludeBitSet.get(i - resultRowDimensionStart)) {
                final Object dimVal = entry.getKey().getKey()[i];
                resultRow.set(i, dimVal instanceof String ? NullHandling.emptyToNullIfNeeded((String) dimVal) : dimVal);
            }
        }
        // Add aggregations.
        final int resultRowAggregatorStart = query.getResultRowAggregatorStart();
        for (int i = 0; i < entry.getValues().length; i++) {
            if (dimsToInclude != null && groupingAggregatorsBitSet.get(i)) {
                // Override with a new value, reflecting the new set of grouping dimensions
                resultRow.set(resultRowAggregatorStart + i, groupingAggregatorValues[i]);
            } else {
                resultRow.set(resultRowAggregatorStart + i, entry.getValues()[i]);
            }
        }
        return resultRow;
    }, closeable);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BitSet(java.util.BitSet) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime)

Example 42 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByQueryEngineV2 method convertRowTypesToOutputTypes.

public static void convertRowTypesToOutputTypes(final List<DimensionSpec> dimensionSpecs, final ResultRow resultRow, final int resultRowDimensionStart) {
    for (int i = 0; i < dimensionSpecs.size(); i++) {
        DimensionSpec dimSpec = dimensionSpecs.get(i);
        final int resultRowIndex = resultRowDimensionStart + i;
        final ColumnType outputType = dimSpec.getOutputType();
        resultRow.set(resultRowIndex, DimensionHandlerUtils.convertObjectToType(resultRow.get(resultRowIndex), outputType));
    }
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType)

Example 43 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByQueryEngineV2 method getCardinalityForArrayAggregation.

/**
 * Returns the cardinality of array needed to do array-based aggregation, or -1 if array-based aggregation
 * is impossible.
 */
public static int getCardinalityForArrayAggregation(GroupByQueryConfig querySpecificConfig, GroupByQuery query, StorageAdapter storageAdapter, ByteBuffer buffer) {
    if (querySpecificConfig.isForceHashAggregation()) {
        return -1;
    }
    final List<DimensionSpec> dimensions = query.getDimensions();
    final ColumnCapabilities columnCapabilities;
    final int cardinality;
    // Find cardinality
    if (dimensions.isEmpty()) {
        columnCapabilities = null;
        cardinality = 1;
    } else if (dimensions.size() == 1) {
        // real columns).
        if (query.getVirtualColumns().exists(Iterables.getOnlyElement(dimensions).getDimension())) {
            return -1;
        }
        // to allocate appropriate values
        if (dimensions.get(0).getOutputType().isArray()) {
            return -1;
        }
        final String columnName = Iterables.getOnlyElement(dimensions).getDimension();
        columnCapabilities = storageAdapter.getColumnCapabilities(columnName);
        cardinality = storageAdapter.getDimensionCardinality(columnName);
    } else {
        // Cannot use array-based aggregation with more than one dimension.
        return -1;
    }
    // Choose array-based aggregation if the grouping key is a single string dimension of a known cardinality
    if (Types.is(columnCapabilities, ValueType.STRING) && cardinality > 0) {
        final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
        final long requiredBufferCapacity = BufferArrayGrouper.requiredBufferCapacity(cardinality, aggregatorFactories);
        // Check that all keys and aggregated values can be contained in the buffer
        if (requiredBufferCapacity < 0 || requiredBufferCapacity > buffer.capacity()) {
            return -1;
        } else {
            return cardinality;
        }
    } else {
        return -1;
    }
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 44 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<ResultRow, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
    return new CacheStrategy<ResultRow, Object, GroupByQuery>() {

        private static final byte CACHE_STRATEGY_VERSION = 0x1;

        private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();

        private final List<DimensionSpec> dims = query.getDimensions();

        @Override
        public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
            return strategySelector.strategize(query).isCacheable(willMergeRunners);
        }

        @Override
        public byte[] computeCacheKey(GroupByQuery query) {
            CacheKeyBuilder builder = new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheables(query.getDimensions()).appendCacheable(query.getVirtualColumns());
            if (query.isApplyLimitPushDown()) {
                builder.appendCacheable(query.getLimitSpec());
            }
            return builder.build();
        }

        @Override
        public byte[] computeResultLevelCacheKey(GroupByQuery query) {
            final CacheKeyBuilder builder = new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheables(query.getDimensions()).appendCacheable(query.getVirtualColumns()).appendCacheable(query.getHavingSpec()).appendCacheable(query.getLimitSpec()).appendCacheables(query.getPostAggregatorSpecs());
            if (query.getSubtotalsSpec() != null && !query.getSubtotalsSpec().isEmpty()) {
                for (List<String> subTotalSpec : query.getSubtotalsSpec()) {
                    builder.appendStrings(subTotalSpec);
                }
            }
            return builder.build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<ResultRow, Object> prepareForCache(boolean isResultLevelCache) {
            final boolean resultRowHasTimestamp = query.getResultRowHasTimestamp();
            return new Function<ResultRow, Object>() {

                @Override
                public Object apply(ResultRow resultRow) {
                    final List<Object> retVal = new ArrayList<>(1 + dims.size() + aggs.size());
                    int inPos = 0;
                    if (resultRowHasTimestamp) {
                        retVal.add(resultRow.getLong(inPos++));
                    } else {
                        retVal.add(query.getUniversalTimestamp().getMillis());
                    }
                    for (int i = 0; i < dims.size(); i++) {
                        retVal.add(resultRow.get(inPos++));
                    }
                    for (int i = 0; i < aggs.size(); i++) {
                        retVal.add(resultRow.get(inPos++));
                    }
                    if (isResultLevelCache) {
                        for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
                            retVal.add(resultRow.get(inPos++));
                        }
                    }
                    return retVal;
                }
            };
        }

        @Override
        public Function<Object, ResultRow> pullFromCache(boolean isResultLevelCache) {
            final boolean resultRowHasTimestamp = query.getResultRowHasTimestamp();
            final int dimensionStart = query.getResultRowDimensionStart();
            final int aggregatorStart = query.getResultRowAggregatorStart();
            final int postAggregatorStart = query.getResultRowPostAggregatorStart();
            return new Function<Object, ResultRow>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public ResultRow apply(Object input) {
                    Iterator<Object> results = ((List<Object>) input).iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
                    final int size = isResultLevelCache ? query.getResultRowSizeWithPostAggregators() : query.getResultRowSizeWithoutPostAggregators();
                    final ResultRow resultRow = ResultRow.create(size);
                    if (resultRowHasTimestamp) {
                        resultRow.set(0, timestamp.getMillis());
                    }
                    final Iterator<DimensionSpec> dimsIter = dims.iterator();
                    int dimPos = 0;
                    while (dimsIter.hasNext() && results.hasNext()) {
                        final DimensionSpec dimensionSpec = dimsIter.next();
                        // Must convert generic Jackson-deserialized type into the proper type.
                        resultRow.set(dimensionStart + dimPos, DimensionHandlerUtils.convertObjectToType(results.next(), dimensionSpec.getOutputType()));
                        dimPos++;
                    }
                    CacheStrategy.fetchAggregatorsFromCache(aggs, results, isResultLevelCache, (aggName, aggPosition, aggValueObject) -> {
                        resultRow.set(aggregatorStart + aggPosition, aggValueObject);
                    });
                    if (isResultLevelCache) {
                        Iterator<PostAggregator> postItr = query.getPostAggregatorSpecs().iterator();
                        int postPos = 0;
                        while (postItr.hasNext() && results.hasNext()) {
                            resultRow.set(postAggregatorStart + postPos, results.next());
                        }
                    }
                    if (dimsIter.hasNext() || results.hasNext()) {
                        throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] results[%s]", dimsIter.hasNext(), results.hasNext());
                    }
                    return resultRow;
                }
            };
        }
    };
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) CacheKeyBuilder(org.apache.druid.query.cache.CacheKeyBuilder) ArrayList(java.util.ArrayList) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) Function(com.google.common.base.Function) List(java.util.List) ArrayList(java.util.ArrayList) ISE(org.apache.druid.java.util.common.ISE) CacheStrategy(org.apache.druid.query.CacheStrategy)

Example 45 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByQueryQueryToolChest method extractionsToRewrite.

/**
 * This function checks the query for dimensions which can be optimized by applying the dimension extraction
 * as the final step of the query instead of on every event.
 *
 * @param query The query to check for optimizations
 *
 * @return The set of dimensions (as offsets into {@code query.getDimensions()}) which can be extracted at the last
 * second upon query completion.
 */
private static BitSet extractionsToRewrite(GroupByQuery query) {
    final BitSet retVal = new BitSet();
    final List<DimensionSpec> dimensions = query.getDimensions();
    for (int i = 0; i < dimensions.size(); i++) {
        final DimensionSpec dimensionSpec = dimensions.get(i);
        if (dimensionSpec.getExtractionFn() != null && ExtractionFn.ExtractionType.ONE_TO_ONE.equals(dimensionSpec.getExtractionFn().getExtractionType())) {
            retVal.set(i);
        }
    }
    return retVal;
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) BitSet(java.util.BitSet)

Aggregations

DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)53 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)27 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)20 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)16 Nullable (javax.annotation.Nullable)15 Test (org.junit.Test)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 MapBasedRow (org.apache.druid.data.input.MapBasedRow)12 Row (org.apache.druid.data.input.Row)12 ISE (org.apache.druid.java.util.common.ISE)12 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)11 Map (java.util.Map)10 ColumnType (org.apache.druid.segment.column.ColumnType)10 List (java.util.List)9 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 HashSet (java.util.HashSet)8 Function (com.google.common.base.Function)7 ImmutableList (com.google.common.collect.ImmutableList)7