use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class TopNQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> getCacheStrategy(final TopNQuery query) {
return new CacheStrategy<Result<TopNResultValue>, Object, TopNQuery>() {
private final List<AggregatorFactory> aggs = Lists.newArrayList(query.getAggregatorSpecs());
private final List<PostAggregator> postAggs = AggregatorUtil.pruneDependentPostAgg(query.getPostAggregatorSpecs(), query.getTopNMetricSpec().getMetricName(query.getDimensionSpec()));
@Override
public boolean isCacheable(TopNQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(TopNQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns());
final List<PostAggregator> postAggregators = prunePostAggregators(query);
if (!postAggregators.isEmpty()) {
// Append post aggregators only when they are used as sort keys.
// Note that appending an empty list produces a different cache key from not appending it.
builder.appendCacheablesIgnoringOrder(postAggregators);
}
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<TopNResultValue>, Object> prepareForCache() {
return new Function<Result<TopNResultValue>, Object>() {
private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());
@Override
public Object apply(final Result<TopNResultValue> input) {
List<DimensionAndMetricValueExtractor> results = Lists.newArrayList(input.getValue());
final List<Object> retVal = Lists.newArrayListWithCapacity(results.size() + 1);
// make sure to preserve timezone information when caching results
retVal.add(input.getTimestamp().getMillis());
for (DimensionAndMetricValueExtractor result : results) {
List<Object> vals = Lists.newArrayListWithCapacity(aggFactoryNames.length + 2);
vals.add(result.getDimensionValue(query.getDimensionSpec().getOutputName()));
for (String aggName : aggFactoryNames) {
vals.add(result.getMetric(aggName));
}
retVal.add(vals);
}
return retVal;
}
};
}
@Override
public Function<Object, Result<TopNResultValue>> pullFromCache() {
return new Function<Object, Result<TopNResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<TopNResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
List<Map<String, Object>> retVal = Lists.newArrayListWithCapacity(results.size());
Iterator<Object> inputIter = results.iterator();
DateTime timestamp = granularity.toDateTime(((Number) inputIter.next()).longValue());
while (inputIter.hasNext()) {
List<Object> result = (List<Object>) inputIter.next();
Map<String, Object> vals = Maps.newLinkedHashMap();
Iterator<AggregatorFactory> aggIter = aggs.iterator();
Iterator<Object> resultIter = result.iterator();
vals.put(query.getDimensionSpec().getOutputName(), resultIter.next());
while (aggIter.hasNext() && resultIter.hasNext()) {
final AggregatorFactory factory = aggIter.next();
vals.put(factory.getName(), factory.deserialize(resultIter.next()));
}
for (PostAggregator postAgg : postAggs) {
vals.put(postAgg.getName(), postAgg.compute(vals));
}
retVal.add(vals);
}
return new Result<>(timestamp, new TopNResultValue(retVal));
}
};
}
};
}
use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class TopNBinaryFn method apply.
@Override
public Result<TopNResultValue> apply(Result<TopNResultValue> arg1, Result<TopNResultValue> arg2) {
if (arg1 == null) {
return merger.getResult(arg2, comparator);
}
if (arg2 == null) {
return merger.getResult(arg1, comparator);
}
Map<Object, DimensionAndMetricValueExtractor> retVals = new LinkedHashMap<>();
TopNResultValue arg1Vals = arg1.getValue();
TopNResultValue arg2Vals = arg2.getValue();
for (DimensionAndMetricValueExtractor arg1Val : arg1Vals) {
retVals.put(arg1Val.getDimensionValue(dimension), arg1Val);
}
for (DimensionAndMetricValueExtractor arg2Val : arg2Vals) {
final Object dimensionValue = arg2Val.getDimensionValue(dimension);
DimensionAndMetricValueExtractor arg1Val = retVals.get(dimensionValue);
if (arg1Val != null) {
// size of map = aggregator + topNDim + postAgg (If sorting is done on post agg field)
Map<String, Object> retVal = new LinkedHashMap<>(aggregations.size() + 2);
retVal.put(dimension, dimensionValue);
for (AggregatorFactory factory : aggregations) {
final String metricName = factory.getName();
retVal.put(metricName, factory.combine(arg1Val.getMetric(metricName), arg2Val.getMetric(metricName)));
}
for (PostAggregator pf : postAggregations) {
retVal.put(pf.getName(), pf.compute(retVal));
}
retVals.put(dimensionValue, new DimensionAndMetricValueExtractor(retVal));
} else {
retVals.put(dimensionValue, arg2Val);
}
}
final DateTime timestamp;
if (gran instanceof AllGranularity) {
timestamp = arg1.getTimestamp();
} else {
timestamp = gran.bucketStart(arg1.getTimestamp());
}
TopNResultBuilder bob = topNMetricSpec.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggregations, postAggregations);
for (DimensionAndMetricValueExtractor extractor : retVals.values()) {
bob.addEntry(extractor);
}
return bob.build();
}
use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class TopNQueryQueryToolChest method makePreComputeManipulatorFn.
@Override
public Function<Result<TopNResultValue>, Result<TopNResultValue>> makePreComputeManipulatorFn(final TopNQuery query, final MetricManipulationFn fn) {
return new Function<Result<TopNResultValue>, Result<TopNResultValue>>() {
private String dimension = query.getDimensionSpec().getOutputName();
private final List<PostAggregator> prunedAggs = prunePostAggregators(query);
private final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());
@Override
public Result<TopNResultValue> apply(Result<TopNResultValue> result) {
List<Map<String, Object>> serializedValues = Lists.newArrayList(Iterables.transform(result.getValue(), new Function<DimensionAndMetricValueExtractor, Map<String, Object>>() {
@Override
public Map<String, Object> apply(DimensionAndMetricValueExtractor input) {
final Map<String, Object> values = Maps.newHashMapWithExpectedSize(aggregatorFactories.length + prunedAggs.size() + 1);
for (int i = 0; i < aggregatorFactories.length; ++i) {
final String aggName = aggFactoryNames[i];
values.put(aggName, fn.manipulate(aggregatorFactories[i], input.getMetric(aggName)));
}
for (PostAggregator postAgg : prunedAggs) {
final String name = postAgg.getName();
Object calculatedPostAgg = input.getMetric(name);
if (calculatedPostAgg != null) {
values.put(name, calculatedPostAgg);
} else {
values.put(name, postAgg.compute(values));
}
}
values.put(dimension, input.getDimensionValue(dimension));
return values;
}
}));
return new Result<TopNResultValue>(result.getTimestamp(), new TopNResultValue(serializedValues));
}
};
}
use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class ApproximateHistogramTopNQueryTest method testTopNWithApproximateHistogramAgg.
@Test
public void testTopNWithApproximateHistogramAgg() {
ApproximateHistogramAggregatorFactory factory = new ApproximateHistogramAggregatorFactory("apphisto", "index", 10, 5, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(QueryRunnerTestHelper.marketDimension).metric(QueryRunnerTestHelper.dependentPostAggMetric).threshold(4).intervals(QueryRunnerTestHelper.fullOnInterval).aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(QueryRunnerTestHelper.commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"), factory)))).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant, QueryRunnerTestHelper.dependentPostAgg, new QuantilePostAggregator("quantile", "apphisto", 0.5f))).build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>builder().put(QueryRunnerTestHelper.marketDimension, "total_market").put("rows", 186L).put("index", 215679.82879638672D).put("addRowsIndexConstant", 215866.82879638672D).put(QueryRunnerTestHelper.dependentPostAggMetric, 216053.82879638672D).put("uniques", QueryRunnerTestHelper.UNIQUES_2).put("maxIndex", 1743.9217529296875D).put("minIndex", 792.3260498046875D).put("quantile", 1085.6775f).put("apphisto", new Histogram(new float[] { 554.4271240234375f, 792.3260498046875f, 1030.2249755859375f, 1268.1239013671875f, 1506.0228271484375f, 1743.9217529296875f }, new double[] { 0.0D, 39.42073059082031D, 103.29110717773438D, 34.93659591674805D, 8.351564407348633D })).build(), ImmutableMap.<String, Object>builder().put(QueryRunnerTestHelper.marketDimension, "upfront").put("rows", 186L).put("index", 192046.1060180664D).put("addRowsIndexConstant", 192233.1060180664D).put(QueryRunnerTestHelper.dependentPostAggMetric, 192420.1060180664D).put("uniques", QueryRunnerTestHelper.UNIQUES_2).put("maxIndex", 1870.06103515625D).put("minIndex", 545.9906005859375D).put("quantile", 880.9881f).put("apphisto", new Histogram(new float[] { 214.97299194335938f, 545.9906005859375f, 877.0081787109375f, 1208.0257568359375f, 1539.0433349609375f, 1870.06103515625f }, new double[] { 0.0D, 67.53287506103516D, 72.22068786621094D, 31.984678268432617D, 14.261756896972656D })).build(), ImmutableMap.<String, Object>builder().put(QueryRunnerTestHelper.marketDimension, "spot").put("rows", 837L).put("index", 95606.57232284546D).put("addRowsIndexConstant", 96444.57232284546D).put(QueryRunnerTestHelper.dependentPostAggMetric, 97282.57232284546D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 277.2735290527344D).put("minIndex", 59.02102279663086D).put("quantile", 101.78856f).put("apphisto", new Histogram(new float[] { 4.457897186279297f, 59.02102279663086f, 113.58415222167969f, 168.14727783203125f, 222.7104034423828f, 277.2735290527344f }, new double[] { 0.0D, 462.4309997558594D, 357.5404968261719D, 15.022850036621094D, 2.0056631565093994D })).build()))));
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
}
use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.
the class QuantileSqlAggregatorTest method testQuantileOnFloatAndLongs.
@Test
public void testQuantileOnFloatAndLongs() throws Exception {
try (final DruidPlanner planner = plannerFactory.createPlanner(null)) {
final String sql = "SELECT\n" + "APPROX_QUANTILE(m1, 0.01),\n" + "APPROX_QUANTILE(m1, 0.5, 50),\n" + "APPROX_QUANTILE(m1, 0.98, 200),\n" + "APPROX_QUANTILE(m1, 0.99),\n" + "APPROX_QUANTILE(m1, 0.99) FILTER(WHERE dim1 = 'abc'),\n" + "APPROX_QUANTILE(m1, 0.999) FILTER(WHERE dim1 <> 'abc'),\n" + "APPROX_QUANTILE(m1, 0.999) FILTER(WHERE dim1 = 'abc'),\n" + "APPROX_QUANTILE(cnt, 0.5)\n" + "FROM foo";
final PlannerResult plannerResult = planner.plan(sql);
// Verify results
final List<Object[]> results = Sequences.toList(plannerResult.run(), new ArrayList<Object[]>());
final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1.0, 3.0, 5.880000114440918, 5.940000057220459, 6.0, 4.994999885559082, 6.0, 1.0 });
Assert.assertEquals(expectedResults.size(), results.size());
for (int i = 0; i < expectedResults.size(); i++) {
Assert.assertArrayEquals(expectedResults.get(i), results.get(i));
}
// Verify query
Assert.assertEquals(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new ApproximateHistogramAggregatorFactory("a0:agg", "m1", null, null, null, null), new ApproximateHistogramAggregatorFactory("a2:agg", "m1", 200, null, null, null), new FilteredAggregatorFactory(new ApproximateHistogramAggregatorFactory("a4:agg", "m1", null, null, null, null), new SelectorDimFilter("dim1", "abc", null)), new FilteredAggregatorFactory(new ApproximateHistogramAggregatorFactory("a5:agg", "m1", null, null, null, null), new NotDimFilter(new SelectorDimFilter("dim1", "abc", null))), new ApproximateHistogramAggregatorFactory("a7:agg", "cnt", null, null, null, null))).postAggregators(ImmutableList.<PostAggregator>of(new QuantilePostAggregator("a0", "a0:agg", 0.01f), new QuantilePostAggregator("a1", "a0:agg", 0.50f), new QuantilePostAggregator("a2", "a2:agg", 0.98f), new QuantilePostAggregator("a3", "a0:agg", 0.99f), new QuantilePostAggregator("a4", "a4:agg", 0.99f), new QuantilePostAggregator("a5", "a5:agg", 0.999f), new QuantilePostAggregator("a6", "a4:agg", 0.999f), new QuantilePostAggregator("a7", "a7:agg", 0.50f))).context(ImmutableMap.<String, Object>of("skipEmptyBuckets", true)).build(), Iterables.getOnlyElement(queryLogHook.getRecordedQueries()));
}
}
Aggregations