use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class RowBasedGrouperHelper method makeGrouperIterator.
public static CloseableGrouperIterator<RowBasedKey, ResultRow> makeGrouperIterator(final Grouper<RowBasedKey> grouper, final GroupByQuery query, @Nullable final List<DimensionSpec> dimsToInclude, final Closeable closeable) {
final boolean includeTimestamp = query.getResultRowHasTimestamp();
final BitSet dimsToIncludeBitSet = new BitSet(query.getDimensions().size());
final int resultRowDimensionStart = query.getResultRowDimensionStart();
final BitSet groupingAggregatorsBitSet = new BitSet(query.getAggregatorSpecs().size());
final Object[] groupingAggregatorValues = new Long[query.getAggregatorSpecs().size()];
if (dimsToInclude != null) {
for (DimensionSpec dimensionSpec : dimsToInclude) {
String outputName = dimensionSpec.getOutputName();
final int dimIndex = query.getResultRowSignature().indexOf(outputName);
if (dimIndex >= 0) {
dimsToIncludeBitSet.set(dimIndex - resultRowDimensionStart);
}
}
// KeyDimensionNames are the input column names of dimensions. Its required since aggregators are not aware of the
// output column names.
// As we exclude certain dimensions from the result row, the value for any grouping_id aggregators have to change
// to reflect the new grouping dimensions, that aggregation is being done upon. We will mark the indices which have
// grouping aggregators and update the value for each row at those indices.
Set<String> keyDimensionNames = dimsToInclude.stream().map(DimensionSpec::getDimension).collect(Collectors.toSet());
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
AggregatorFactory aggregatorFactory = query.getAggregatorSpecs().get(i);
if (aggregatorFactory instanceof GroupingAggregatorFactory) {
groupingAggregatorsBitSet.set(i);
groupingAggregatorValues[i] = ((GroupingAggregatorFactory) aggregatorFactory).withKeyDimensions(keyDimensionNames).getValue();
}
}
}
return new CloseableGrouperIterator<>(grouper.iterator(true), entry -> {
final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
// Add timestamp, maybe.
if (includeTimestamp) {
final DateTime timestamp = query.getGranularity().toDateTime(((long) (entry.getKey().getKey()[0])));
resultRow.set(0, timestamp.getMillis());
}
// Add dimensions.
for (int i = resultRowDimensionStart; i < entry.getKey().getKey().length; i++) {
if (dimsToInclude == null || dimsToIncludeBitSet.get(i - resultRowDimensionStart)) {
final Object dimVal = entry.getKey().getKey()[i];
resultRow.set(i, dimVal instanceof String ? NullHandling.emptyToNullIfNeeded((String) dimVal) : dimVal);
}
}
// Add aggregations.
final int resultRowAggregatorStart = query.getResultRowAggregatorStart();
for (int i = 0; i < entry.getValues().length; i++) {
if (dimsToInclude != null && groupingAggregatorsBitSet.get(i)) {
// Override with a new value, reflecting the new set of grouping dimensions
resultRow.set(resultRowAggregatorStart + i, groupingAggregatorValues[i]);
} else {
resultRow.set(resultRowAggregatorStart + i, entry.getValues()[i]);
}
}
return resultRow;
}, closeable);
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryEngineV2 method convertRowTypesToOutputTypes.
public static void convertRowTypesToOutputTypes(final List<DimensionSpec> dimensionSpecs, final ResultRow resultRow, final int resultRowDimensionStart) {
for (int i = 0; i < dimensionSpecs.size(); i++) {
DimensionSpec dimSpec = dimensionSpecs.get(i);
final int resultRowIndex = resultRowDimensionStart + i;
final ColumnType outputType = dimSpec.getOutputType();
resultRow.set(resultRowIndex, DimensionHandlerUtils.convertObjectToType(resultRow.get(resultRowIndex), outputType));
}
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryEngineV2 method getCardinalityForArrayAggregation.
/**
* Returns the cardinality of array needed to do array-based aggregation, or -1 if array-based aggregation
* is impossible.
*/
public static int getCardinalityForArrayAggregation(GroupByQueryConfig querySpecificConfig, GroupByQuery query, StorageAdapter storageAdapter, ByteBuffer buffer) {
if (querySpecificConfig.isForceHashAggregation()) {
return -1;
}
final List<DimensionSpec> dimensions = query.getDimensions();
final ColumnCapabilities columnCapabilities;
final int cardinality;
// Find cardinality
if (dimensions.isEmpty()) {
columnCapabilities = null;
cardinality = 1;
} else if (dimensions.size() == 1) {
// real columns).
if (query.getVirtualColumns().exists(Iterables.getOnlyElement(dimensions).getDimension())) {
return -1;
}
// to allocate appropriate values
if (dimensions.get(0).getOutputType().isArray()) {
return -1;
}
final String columnName = Iterables.getOnlyElement(dimensions).getDimension();
columnCapabilities = storageAdapter.getColumnCapabilities(columnName);
cardinality = storageAdapter.getDimensionCardinality(columnName);
} else {
// Cannot use array-based aggregation with more than one dimension.
return -1;
}
// Choose array-based aggregation if the grouping key is a single string dimension of a known cardinality
if (Types.is(columnCapabilities, ValueType.STRING) && cardinality > 0) {
final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
final long requiredBufferCapacity = BufferArrayGrouper.requiredBufferCapacity(cardinality, aggregatorFactories);
// Check that all keys and aggregated values can be contained in the buffer
if (requiredBufferCapacity < 0 || requiredBufferCapacity > buffer.capacity()) {
return -1;
} else {
return cardinality;
}
} else {
return -1;
}
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<ResultRow, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
return new CacheStrategy<ResultRow, Object, GroupByQuery>() {
private static final byte CACHE_STRATEGY_VERSION = 0x1;
private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();
private final List<DimensionSpec> dims = query.getDimensions();
@Override
public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
return strategySelector.strategize(query).isCacheable(willMergeRunners);
}
@Override
public byte[] computeCacheKey(GroupByQuery query) {
CacheKeyBuilder builder = new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheables(query.getDimensions()).appendCacheable(query.getVirtualColumns());
if (query.isApplyLimitPushDown()) {
builder.appendCacheable(query.getLimitSpec());
}
return builder.build();
}
@Override
public byte[] computeResultLevelCacheKey(GroupByQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheables(query.getDimensions()).appendCacheable(query.getVirtualColumns()).appendCacheable(query.getHavingSpec()).appendCacheable(query.getLimitSpec()).appendCacheables(query.getPostAggregatorSpecs());
if (query.getSubtotalsSpec() != null && !query.getSubtotalsSpec().isEmpty()) {
for (List<String> subTotalSpec : query.getSubtotalsSpec()) {
builder.appendStrings(subTotalSpec);
}
}
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<ResultRow, Object> prepareForCache(boolean isResultLevelCache) {
final boolean resultRowHasTimestamp = query.getResultRowHasTimestamp();
return new Function<ResultRow, Object>() {
@Override
public Object apply(ResultRow resultRow) {
final List<Object> retVal = new ArrayList<>(1 + dims.size() + aggs.size());
int inPos = 0;
if (resultRowHasTimestamp) {
retVal.add(resultRow.getLong(inPos++));
} else {
retVal.add(query.getUniversalTimestamp().getMillis());
}
for (int i = 0; i < dims.size(); i++) {
retVal.add(resultRow.get(inPos++));
}
for (int i = 0; i < aggs.size(); i++) {
retVal.add(resultRow.get(inPos++));
}
if (isResultLevelCache) {
for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
retVal.add(resultRow.get(inPos++));
}
}
return retVal;
}
};
}
@Override
public Function<Object, ResultRow> pullFromCache(boolean isResultLevelCache) {
final boolean resultRowHasTimestamp = query.getResultRowHasTimestamp();
final int dimensionStart = query.getResultRowDimensionStart();
final int aggregatorStart = query.getResultRowAggregatorStart();
final int postAggregatorStart = query.getResultRowPostAggregatorStart();
return new Function<Object, ResultRow>() {
private final Granularity granularity = query.getGranularity();
@Override
public ResultRow apply(Object input) {
Iterator<Object> results = ((List<Object>) input).iterator();
DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
final int size = isResultLevelCache ? query.getResultRowSizeWithPostAggregators() : query.getResultRowSizeWithoutPostAggregators();
final ResultRow resultRow = ResultRow.create(size);
if (resultRowHasTimestamp) {
resultRow.set(0, timestamp.getMillis());
}
final Iterator<DimensionSpec> dimsIter = dims.iterator();
int dimPos = 0;
while (dimsIter.hasNext() && results.hasNext()) {
final DimensionSpec dimensionSpec = dimsIter.next();
// Must convert generic Jackson-deserialized type into the proper type.
resultRow.set(dimensionStart + dimPos, DimensionHandlerUtils.convertObjectToType(results.next(), dimensionSpec.getOutputType()));
dimPos++;
}
CacheStrategy.fetchAggregatorsFromCache(aggs, results, isResultLevelCache, (aggName, aggPosition, aggValueObject) -> {
resultRow.set(aggregatorStart + aggPosition, aggValueObject);
});
if (isResultLevelCache) {
Iterator<PostAggregator> postItr = query.getPostAggregatorSpecs().iterator();
int postPos = 0;
while (postItr.hasNext() && results.hasNext()) {
resultRow.set(postAggregatorStart + postPos, results.next());
}
}
if (dimsIter.hasNext() || results.hasNext()) {
throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] results[%s]", dimsIter.hasNext(), results.hasNext());
}
return resultRow;
}
};
}
};
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryQueryToolChest method extractionsToRewrite.
/**
* This function checks the query for dimensions which can be optimized by applying the dimension extraction
* as the final step of the query instead of on every event.
*
* @param query The query to check for optimizations
*
* @return The set of dimensions (as offsets into {@code query.getDimensions()}) which can be extracted at the last
* second upon query completion.
*/
private static BitSet extractionsToRewrite(GroupByQuery query) {
final BitSet retVal = new BitSet();
final List<DimensionSpec> dimensions = query.getDimensions();
for (int i = 0; i < dimensions.size(); i++) {
final DimensionSpec dimensionSpec = dimensions.get(i);
if (dimensionSpec.getExtractionFn() != null && ExtractionFn.ExtractionType.ONE_TO_ONE.equals(dimensionSpec.getExtractionFn().getExtractionType())) {
retVal.set(i);
}
}
return retVal;
}
Aggregations