Search in sources :

Example 96 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class TopNQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> getCacheStrategy(final TopNQuery query) {
    return new CacheStrategy<Result<TopNResultValue>, Object, TopNQuery>() {

        private final List<AggregatorFactory> aggs = Lists.newArrayList(query.getAggregatorSpecs());

        private final List<PostAggregator> postAggs = AggregatorUtil.pruneDependentPostAgg(query.getPostAggregatorSpecs(), query.getTopNMetricSpec().getMetricName(query.getDimensionSpec()));

        @Override
        public boolean isCacheable(TopNQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(TopNQuery query) {
            final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns());
            final List<PostAggregator> postAggregators = prunePostAggregators(query);
            if (!postAggregators.isEmpty()) {
                // Append post aggregators only when they are used as sort keys.
                // Note that appending an empty list produces a different cache key from not appending it.
                builder.appendCacheablesIgnoringOrder(postAggregators);
            }
            return builder.build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<TopNResultValue>, Object> prepareForCache() {
            return new Function<Result<TopNResultValue>, Object>() {

                private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

                @Override
                public Object apply(final Result<TopNResultValue> input) {
                    List<DimensionAndMetricValueExtractor> results = Lists.newArrayList(input.getValue());
                    final List<Object> retVal = Lists.newArrayListWithCapacity(results.size() + 1);
                    // make sure to preserve timezone information when caching results
                    retVal.add(input.getTimestamp().getMillis());
                    for (DimensionAndMetricValueExtractor result : results) {
                        List<Object> vals = Lists.newArrayListWithCapacity(aggFactoryNames.length + 2);
                        vals.add(result.getDimensionValue(query.getDimensionSpec().getOutputName()));
                        for (String aggName : aggFactoryNames) {
                            vals.add(result.getMetric(aggName));
                        }
                        retVal.add(vals);
                    }
                    return retVal;
                }
            };
        }

        @Override
        public Function<Object, Result<TopNResultValue>> pullFromCache() {
            return new Function<Object, Result<TopNResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<TopNResultValue> apply(Object input) {
                    List<Object> results = (List<Object>) input;
                    List<Map<String, Object>> retVal = Lists.newArrayListWithCapacity(results.size());
                    Iterator<Object> inputIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) inputIter.next()).longValue());
                    while (inputIter.hasNext()) {
                        List<Object> result = (List<Object>) inputIter.next();
                        Map<String, Object> vals = Maps.newLinkedHashMap();
                        Iterator<AggregatorFactory> aggIter = aggs.iterator();
                        Iterator<Object> resultIter = result.iterator();
                        vals.put(query.getDimensionSpec().getOutputName(), resultIter.next());
                        while (aggIter.hasNext() && resultIter.hasNext()) {
                            final AggregatorFactory factory = aggIter.next();
                            vals.put(factory.getName(), factory.deserialize(resultIter.next()));
                        }
                        for (PostAggregator postAgg : postAggs) {
                            vals.put(postAgg.getName(), postAgg.compute(vals));
                        }
                        retVal.add(vals);
                    }
                    return new Result<>(timestamp, new TopNResultValue(retVal));
                }
            };
        }
    };
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) Granularity(io.druid.java.util.common.granularity.Granularity) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) Map(java.util.Map) CacheStrategy(io.druid.query.CacheStrategy)

Example 97 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class TopNBinaryFn method apply.

@Override
public Result<TopNResultValue> apply(Result<TopNResultValue> arg1, Result<TopNResultValue> arg2) {
    if (arg1 == null) {
        return merger.getResult(arg2, comparator);
    }
    if (arg2 == null) {
        return merger.getResult(arg1, comparator);
    }
    Map<Object, DimensionAndMetricValueExtractor> retVals = new LinkedHashMap<>();
    TopNResultValue arg1Vals = arg1.getValue();
    TopNResultValue arg2Vals = arg2.getValue();
    for (DimensionAndMetricValueExtractor arg1Val : arg1Vals) {
        retVals.put(arg1Val.getDimensionValue(dimension), arg1Val);
    }
    for (DimensionAndMetricValueExtractor arg2Val : arg2Vals) {
        final Object dimensionValue = arg2Val.getDimensionValue(dimension);
        DimensionAndMetricValueExtractor arg1Val = retVals.get(dimensionValue);
        if (arg1Val != null) {
            // size of map = aggregator + topNDim + postAgg (If sorting is done on post agg field)
            Map<String, Object> retVal = new LinkedHashMap<>(aggregations.size() + 2);
            retVal.put(dimension, dimensionValue);
            for (AggregatorFactory factory : aggregations) {
                final String metricName = factory.getName();
                retVal.put(metricName, factory.combine(arg1Val.getMetric(metricName), arg2Val.getMetric(metricName)));
            }
            for (PostAggregator pf : postAggregations) {
                retVal.put(pf.getName(), pf.compute(retVal));
            }
            retVals.put(dimensionValue, new DimensionAndMetricValueExtractor(retVal));
        } else {
            retVals.put(dimensionValue, arg2Val);
        }
    }
    final DateTime timestamp;
    if (gran instanceof AllGranularity) {
        timestamp = arg1.getTimestamp();
    } else {
        timestamp = gran.bucketStart(arg1.getTimestamp());
    }
    TopNResultBuilder bob = topNMetricSpec.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggregations, postAggregations);
    for (DimensionAndMetricValueExtractor extractor : retVals.values()) {
        bob.addEntry(extractor);
    }
    return bob.build();
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AllGranularity(io.druid.java.util.common.granularity.AllGranularity) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap)

Example 98 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class TopNQueryQueryToolChest method makePreComputeManipulatorFn.

@Override
public Function<Result<TopNResultValue>, Result<TopNResultValue>> makePreComputeManipulatorFn(final TopNQuery query, final MetricManipulationFn fn) {
    return new Function<Result<TopNResultValue>, Result<TopNResultValue>>() {

        private String dimension = query.getDimensionSpec().getOutputName();

        private final List<PostAggregator> prunedAggs = prunePostAggregators(query);

        private final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);

        private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

        @Override
        public Result<TopNResultValue> apply(Result<TopNResultValue> result) {
            List<Map<String, Object>> serializedValues = Lists.newArrayList(Iterables.transform(result.getValue(), new Function<DimensionAndMetricValueExtractor, Map<String, Object>>() {

                @Override
                public Map<String, Object> apply(DimensionAndMetricValueExtractor input) {
                    final Map<String, Object> values = Maps.newHashMapWithExpectedSize(aggregatorFactories.length + prunedAggs.size() + 1);
                    for (int i = 0; i < aggregatorFactories.length; ++i) {
                        final String aggName = aggFactoryNames[i];
                        values.put(aggName, fn.manipulate(aggregatorFactories[i], input.getMetric(aggName)));
                    }
                    for (PostAggregator postAgg : prunedAggs) {
                        final String name = postAgg.getName();
                        Object calculatedPostAgg = input.getMetric(name);
                        if (calculatedPostAgg != null) {
                            values.put(name, calculatedPostAgg);
                        } else {
                            values.put(name, postAgg.compute(values));
                        }
                    }
                    values.put(dimension, input.getDimensionValue(dimension));
                    return values;
                }
            }));
            return new Result<TopNResultValue>(result.getTimestamp(), new TopNResultValue(serializedValues));
        }
    };
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) Map(java.util.Map)

Aggregations

PostAggregator (io.druid.query.aggregation.PostAggregator)98 Test (org.junit.Test)72 Result (io.druid.query.Result)51 DateTime (org.joda.time.DateTime)47 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)43 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)27 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)26 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)26 ArithmeticPostAggregator (io.druid.query.aggregation.post.ArithmeticPostAggregator)25 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)24 FieldAccessPostAggregator (io.druid.query.aggregation.post.FieldAccessPostAggregator)24 ConstantPostAggregator (io.druid.query.aggregation.post.ConstantPostAggregator)23 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)23 HashMap (java.util.HashMap)23 Row (io.druid.data.input.Row)15 RegexDimExtractionFn (io.druid.query.extraction.RegexDimExtractionFn)14 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)13 DimensionSpec (io.druid.query.dimension.DimensionSpec)12 CountAggregator (io.druid.query.aggregation.CountAggregator)10 ExpressionPostAggregator (io.druid.query.aggregation.post.ExpressionPostAggregator)10