use of org.apache.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class ArrayOfDoublesSketchToNumEntriesPostAggregatorTest method testToString.
@Test
public void testToString() {
PostAggregator postAgg = new ArrayOfDoublesSketchToNumEntriesPostAggregator("a", new ConstantPostAggregator("", 0));
Assert.assertEquals("ArrayOfDoublesSketchToNumEntriesPostAggregator{name='a', field=ConstantPostAggregator{name='', constantValue=0}}", postAgg.toString());
}
use of org.apache.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class GroupByStrategyV2 method mergeResults.
@Override
public Sequence<ResultRow> mergeResults(final QueryRunner<ResultRow> baseRunner, final GroupByQuery query, final ResponseContext responseContext) {
// Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
// involve materialization)
final ResultMergeQueryRunner<ResultRow> mergingQueryRunner = new ResultMergeQueryRunner<>(baseRunner, this::createResultComparator, this::createMergeFn);
// Set up downstream context.
final ImmutableMap.Builder<String, Object> context = ImmutableMap.builder();
context.put("finalize", false);
context.put(GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2);
context.put(CTX_KEY_OUTERMOST, false);
Granularity granularity = query.getGranularity();
List<DimensionSpec> dimensionSpecs = query.getDimensions();
// the CTX_TIMESTAMP_RESULT_FIELD is set in DruidQuery.java
final String timestampResultField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
final boolean hasTimestampResultField = (timestampResultField != null && !timestampResultField.isEmpty()) && query.getContextBoolean(CTX_KEY_OUTERMOST, true) && !query.isApplyLimitPushDown();
int timestampResultFieldIndex = 0;
if (hasTimestampResultField) {
// sql like "group by city_id,time_floor(__time to day)",
// the original translated query is granularity=all and dimensions:[d0, d1]
// the better plan is granularity=day and dimensions:[d0]
// but the ResultRow structure is changed from [d0, d1] to [__time, d0]
// this structure should be fixed as [d0, d1] (actually it is [d0, __time]) before postAggs are called.
//
// the above is the general idea of this optimization.
// but from coding perspective, the granularity=all and "d0" dimension are referenced by many places,
// eg: subtotals, having, grouping set, post agg,
// there would be many many places need to be fixed if "d0" dimension is removed from query.dimensions
// and the same to the granularity change.
// so from easier coding perspective, this optimization is coded as groupby engine-level inner process change.
// the most part of codes are in GroupByStrategyV2 about the process change between broker and compute node.
// the basic logic like nested queries and subtotals are kept unchanged,
// they will still see the granularity=all and the "d0" dimension.
//
// the tradeoff is that GroupByStrategyV2 behaviors differently according to the query contexts set in DruidQuery
// in another word,
// the query generated by "explain plan for select ..." doesn't match to the native query ACTUALLY being executed,
// the granularity and dimensions are slightly different.
// now, part of the query plan logic is handled in GroupByStrategyV2, not only in DruidQuery.toGroupByQuery()
final Granularity timestampResultFieldGranularity = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY);
dimensionSpecs = query.getDimensions().stream().filter(dimensionSpec -> !dimensionSpec.getOutputName().equals(timestampResultField)).collect(Collectors.toList());
granularity = timestampResultFieldGranularity;
// when timestampResultField is the last dimension, should set sortByDimsFirst=true,
// otherwise the downstream is sorted by row's timestamp first which makes the final ordering not as expected
timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
if (!query.getContextSortByDimsFirst() && timestampResultFieldIndex == query.getDimensions().size() - 1) {
context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, true);
}
// it is actually equals to sortByDimsFirst=false
if (query.getContextSortByDimsFirst() && timestampResultFieldIndex == 0) {
context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
}
// when hasTimestampResultField=true and timestampResultField is neither first nor last dimension,
// the DefaultLimitSpec will always do the reordering
}
final int timestampResultFieldIndexInOriginalDimensions = timestampResultFieldIndex;
if (query.getUniversalTimestamp() != null && !hasTimestampResultField) {
// universalTimestamp works only when granularity is all
// hasTimestampResultField works only when granularity is all
// fudgeTimestamp should not be used when hasTimestampResultField=true due to the row's actual timestamp is used
context.put(CTX_KEY_FUDGE_TIMESTAMP, String.valueOf(query.getUniversalTimestamp().getMillis()));
}
// The having spec shouldn't be passed down, so we need to convey the existing limit push down status
context.put(GroupByQueryConfig.CTX_KEY_APPLY_LIMIT_PUSH_DOWN, query.isApplyLimitPushDown());
// Always request array result rows when passing the query downstream.
context.put(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, true);
final GroupByQuery newQuery = new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), granularity, dimensionSpecs, query.getAggregatorSpecs(), // Don't apply postaggregators on compute nodes
ImmutableList.of(), // Don't do "having" clause until the end of this method.
null, // higher-up).
query.isApplyLimitPushDown() ? ((DefaultLimitSpec) query.getLimitSpec()).withOffsetToLimit() : null, query.getSubtotalsSpec(), query.getContext()).withOverriddenContext(context.build());
final Sequence<ResultRow> mergedResults = mergingQueryRunner.run(QueryPlus.wrap(newQuery), responseContext);
if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true) || query.getContextBoolean(GroupByQueryConfig.CTX_KEY_EXECUTING_NESTED_QUERY, false)) {
return mergedResults;
} else if (query.getPostAggregatorSpecs().isEmpty()) {
if (!hasTimestampResultField) {
return mergedResults;
}
return Sequences.map(mergedResults, row -> {
final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, resultRow);
return resultRow;
});
} else {
return Sequences.map(mergedResults, row -> {
// This function's purpose is to apply PostAggregators.
final ResultRow rowWithPostAggregations = ResultRow.create(query.getResultRowSizeWithPostAggregators());
// Copy everything that comes before the postaggregations.
if (hasTimestampResultField) {
moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, rowWithPostAggregations);
} else {
for (int i = 0; i < query.getResultRowPostAggregatorStart(); i++) {
rowWithPostAggregations.set(i, row.get(i));
}
}
// Compute postaggregations. We need to do this with a result-row map because PostAggregator.compute
// expects a map. Some further design adjustment may eliminate the need for it, and speed up this function.
final Map<String, Object> mapForPostAggregationComputation = rowWithPostAggregations.toMap(query);
for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
final PostAggregator postAggregator = query.getPostAggregatorSpecs().get(i);
final Object value = postAggregator.compute(mapForPostAggregationComputation);
rowWithPostAggregations.set(query.getResultRowPostAggregatorStart() + i, value);
mapForPostAggregationComputation.put(postAggregator.getName(), value);
}
return rowWithPostAggregations;
});
}
}
use of org.apache.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class TimeseriesQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) {
return new CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery>() {
private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();
@Override
public boolean isCacheable(TimeseriesQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(TimeseriesQuery query) {
return new CacheKeyBuilder(TIMESERIES_QUERY).appendBoolean(query.isDescending()).appendBoolean(query.isSkipEmptyBuckets()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).appendInt(query.getLimit()).build();
}
@Override
public byte[] computeResultLevelCacheKey(TimeseriesQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(TIMESERIES_QUERY).appendBoolean(query.isDescending()).appendBoolean(query.isSkipEmptyBuckets()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).appendCacheables(query.getPostAggregatorSpecs()).appendInt(query.getLimit()).appendString(query.getTimestampResultField()).appendBoolean(query.isGrandTotal());
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<TimeseriesResultValue>, Object> prepareForCache(boolean isResultLevelCache) {
return input -> {
TimeseriesResultValue results = input.getValue();
final List<Object> retVal = Lists.newArrayListWithCapacity(1 + aggs.size());
// Timestamp can be null if grandTotal is true.
if (isResultLevelCache) {
retVal.add(input.getTimestamp() == null ? null : input.getTimestamp().getMillis());
} else {
retVal.add(Preconditions.checkNotNull(input.getTimestamp(), "timestamp of input[%s]", input).getMillis());
}
for (AggregatorFactory agg : aggs) {
retVal.add(results.getMetric(agg.getName()));
}
if (isResultLevelCache) {
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
retVal.add(results.getMetric(postAgg.getName()));
}
}
return retVal;
};
}
@Override
public Function<Object, Result<TimeseriesResultValue>> pullFromCache(boolean isResultLevelCache) {
return new Function<Object, Result<TimeseriesResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<TimeseriesResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
final Map<String, Object> retVal = Maps.newLinkedHashMap();
Iterator<Object> resultIter = results.iterator();
final Number timestampNumber = (Number) resultIter.next();
final DateTime timestamp;
if (isResultLevelCache) {
timestamp = timestampNumber == null ? null : granularity.toDateTime(timestampNumber.longValue());
} else {
timestamp = granularity.toDateTime(Preconditions.checkNotNull(timestampNumber, "timestamp").longValue());
}
CacheStrategy.fetchAggregatorsFromCache(aggs, resultIter, isResultLevelCache, (aggName, aggPosition, aggValueObject) -> {
retVal.put(aggName, aggValueObject);
});
if (isResultLevelCache) {
Iterator<PostAggregator> postItr = query.getPostAggregatorSpecs().iterator();
while (postItr.hasNext() && resultIter.hasNext()) {
retVal.put(postItr.next().getName(), resultIter.next());
}
}
return new Result<>(timestamp, new TimeseriesResultValue(retVal));
}
};
}
};
}
use of org.apache.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class QueriesTest method testVerifyAggregationsMissingVal.
@Test
public void testVerifyAggregationsMissingVal() {
List<AggregatorFactory> aggFactories = Arrays.asList(new CountAggregatorFactory("count"), new DoubleSumAggregatorFactory("idx", "index"), new DoubleSumAggregatorFactory("rev", "revenue"));
List<PostAggregator> postAggs = Collections.singletonList(new ArithmeticPostAggregator("addStuff", "+", Arrays.asList(new FieldAccessPostAggregator("idx", "idx2"), new FieldAccessPostAggregator("count", "count"))));
boolean exceptionOccured = false;
try {
Queries.prepareAggregations(ImmutableList.of(), aggFactories, postAggs);
} catch (IllegalArgumentException e) {
exceptionOccured = true;
}
Assert.assertTrue(exceptionOccured);
}
use of org.apache.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class StandardDeviationPostAggregatorTest method testToString.
@Test
public void testToString() {
PostAggregator postAgg = new StandardDeviationPostAggregator("post", "test_field", "population");
Assert.assertEquals("StandardDeviationPostAggregator{name='post', fieldName='test_field', estimator='population', isVariancePop=true}", postAgg.toString());
}
Aggregations