Search in sources :

Example 6 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<Row> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<Row> subqueryResult) {
    final Set<AggregatorFactory> aggs = Sets.newHashSet();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = Sets.newHashSet();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(Lists.newArrayList(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), configSupplier.get(), bufferPool, subqueryResult, false);
    //Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    //is ensured by QuerySegmentSpec.
    //GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    //and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, configSupplier.get(), bufferPool, Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<Row>>() {

        @Override
        public Sequence<Row> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })), true);
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(io.druid.java.util.common.guava.Sequence) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) IAE(io.druid.java.util.common.IAE) GroupByQuery(io.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) Interval(org.joda.time.Interval)

Example 7 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class GroupByStrategyV2 method mergeResults.

@Override
public Sequence<Row> mergeResults(final QueryRunner<Row> baseRunner, final GroupByQuery query, final Map<String, Object> responseContext) {
    // Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
    // involve materialization)
    final ResultMergeQueryRunner<Row> mergingQueryRunner = new ResultMergeQueryRunner<Row>(baseRunner) {

        @Override
        protected Ordering<Row> makeOrdering(Query<Row> queryParam) {
            return ((GroupByQuery) queryParam).getRowOrdering(true);
        }

        @Override
        protected BinaryFn<Row, Row, Row> createMergeFn(Query<Row> queryParam) {
            return new GroupByBinaryFnV2((GroupByQuery) queryParam);
        }
    };
    // Fudge timestamp, maybe.
    final DateTime fudgeTimestamp = getUniversalTimestamp(query);
    return query.applyLimit(Sequences.map(mergingQueryRunner.run(new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), query.getGranularity(), query.getDimensions(), query.getAggregatorSpecs(), // Don't do post aggs until the end of this method.
    ImmutableList.<PostAggregator>of(), // Don't do "having" clause until the end of this method.
    null, null, query.getContext()).withOverriddenContext(ImmutableMap.<String, Object>of("finalize", false, GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2, CTX_KEY_FUDGE_TIMESTAMP, fudgeTimestamp == null ? "" : String.valueOf(fudgeTimestamp.getMillis()), CTX_KEY_OUTERMOST, false)), responseContext), new Function<Row, Row>() {

        @Override
        public Row apply(final Row row) {
            if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true)) {
                return row;
            }
            if (query.getPostAggregatorSpecs().isEmpty() && fudgeTimestamp == null) {
                return row;
            }
            final Map<String, Object> newMap;
            if (query.getPostAggregatorSpecs().isEmpty()) {
                newMap = ((MapBasedRow) row).getEvent();
            } else {
                newMap = Maps.newLinkedHashMap(((MapBasedRow) row).getEvent());
                for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
                    newMap.put(postAggregator.getName(), postAggregator.compute(newMap));
                }
            }
            return new MapBasedRow(fudgeTimestamp != null ? fudgeTimestamp : row.getTimestamp(), newMap);
        }
    }));
}
Also used : ResultMergeQueryRunner(io.druid.query.ResultMergeQueryRunner) GroupByBinaryFnV2(io.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) PostAggregator(io.druid.query.aggregation.PostAggregator) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Example 8 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class TopNNumericResultBuilder method addEntry.

@Override
public TopNNumericResultBuilder addEntry(Comparable dimName, Object dimValIndex, Object[] metricVals) {
    Preconditions.checkArgument(metricVals.length == aggFactoryNames.length, "metricVals must be the same length as aggFactories");
    final Map<String, Object> metricValues = Maps.newHashMapWithExpectedSize(metricVals.length + postAggs.size() + 1);
    metricValues.put(dimSpec.getOutputName(), dimName);
    final int extra = metricVals.length % LOOP_UNROLL_COUNT;
    switch(extra) {
        case 7:
            metricValues.put(aggFactoryNames[6], metricVals[6]);
        case 6:
            metricValues.put(aggFactoryNames[5], metricVals[5]);
        case 5:
            metricValues.put(aggFactoryNames[4], metricVals[4]);
        case 4:
            metricValues.put(aggFactoryNames[3], metricVals[3]);
        case 3:
            metricValues.put(aggFactoryNames[2], metricVals[2]);
        case 2:
            metricValues.put(aggFactoryNames[1], metricVals[1]);
        case 1:
            metricValues.put(aggFactoryNames[0], metricVals[0]);
    }
    for (int i = extra; i < metricVals.length; i += LOOP_UNROLL_COUNT) {
        metricValues.put(aggFactoryNames[i + 0], metricVals[i + 0]);
        metricValues.put(aggFactoryNames[i + 1], metricVals[i + 1]);
        metricValues.put(aggFactoryNames[i + 2], metricVals[i + 2]);
        metricValues.put(aggFactoryNames[i + 3], metricVals[i + 3]);
        metricValues.put(aggFactoryNames[i + 4], metricVals[i + 4]);
        metricValues.put(aggFactoryNames[i + 5], metricVals[i + 5]);
        metricValues.put(aggFactoryNames[i + 6], metricVals[i + 6]);
        metricValues.put(aggFactoryNames[i + 7], metricVals[i + 7]);
    }
    // Order matters here, do not unroll
    for (PostAggregator postAgg : postAggs) {
        metricValues.put(postAgg.getName(), postAgg.compute(metricValues));
    }
    Object topNMetricVal = metricValues.get(metricName);
    if (shouldAdd(topNMetricVal)) {
        DimValHolder dimValHolder = new DimValHolder.Builder().withTopNMetricVal(topNMetricVal).withDimName(dimName).withDimValIndex(dimValIndex).withMetricValues(metricValues).build();
        pQueue.add(dimValHolder);
    }
    if (this.pQueue.size() > this.threshold) {
        pQueue.poll();
    }
    return this;
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator)

Example 9 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class TopNQueryQueryToolChest method makePostComputeManipulatorFn.

@Override
public Function<Result<TopNResultValue>, Result<TopNResultValue>> makePostComputeManipulatorFn(final TopNQuery query, final MetricManipulationFn fn) {
    return new Function<Result<TopNResultValue>, Result<TopNResultValue>>() {

        private String dimension = query.getDimensionSpec().getOutputName();

        private final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);

        private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

        private final PostAggregator[] postAggregators = query.getPostAggregatorSpecs().toArray(new PostAggregator[0]);

        @Override
        public Result<TopNResultValue> apply(Result<TopNResultValue> result) {
            List<Map<String, Object>> serializedValues = Lists.newArrayList(Iterables.transform(result.getValue(), new Function<DimensionAndMetricValueExtractor, Map<String, Object>>() {

                @Override
                public Map<String, Object> apply(DimensionAndMetricValueExtractor input) {
                    final Map<String, Object> values = Maps.newHashMapWithExpectedSize(aggregatorFactories.length + query.getPostAggregatorSpecs().size() + 1);
                    for (int i = 0; i < aggFactoryNames.length; ++i) {
                        final String name = aggFactoryNames[i];
                        values.put(name, input.getMetric(name));
                    }
                    for (PostAggregator postAgg : postAggregators) {
                        Object calculatedPostAgg = input.getMetric(postAgg.getName());
                        if (calculatedPostAgg != null) {
                            values.put(postAgg.getName(), calculatedPostAgg);
                        } else {
                            values.put(postAgg.getName(), postAgg.compute(values));
                        }
                    }
                    for (int i = 0; i < aggFactoryNames.length; ++i) {
                        final String name = aggFactoryNames[i];
                        values.put(name, fn.manipulate(aggregatorFactories[i], input.getMetric(name)));
                    }
                    values.put(dimension, input.getDimensionValue(dimension));
                    return values;
                }
            }));
            return new Result<>(result.getTimestamp(), new TopNResultValue(serializedValues));
        }
    };
}
Also used : Function(com.google.common.base.Function) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Map(java.util.Map) Result(io.druid.query.Result)

Example 10 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class NumericTopNMetricSpec method verifyPreconditions.

@Override
public void verifyPreconditions(List<AggregatorFactory> aggregatorSpecs, List<PostAggregator> postAggregatorSpecs) {
    Preconditions.checkNotNull(metric, "metric can't be null");
    Preconditions.checkNotNull(aggregatorSpecs, "aggregations cannot be null");
    Preconditions.checkArgument(aggregatorSpecs.size() > 0, "Must have at least one AggregatorFactory");
    final AggregatorFactory aggregator = Iterables.tryFind(aggregatorSpecs, new Predicate<AggregatorFactory>() {

        @Override
        public boolean apply(AggregatorFactory input) {
            return input.getName().equals(metric);
        }
    }).orNull();
    final PostAggregator postAggregator = Iterables.tryFind(postAggregatorSpecs, new Predicate<PostAggregator>() {

        @Override
        public boolean apply(PostAggregator input) {
            return input.getName().equals(metric);
        }
    }).orNull();
    Preconditions.checkArgument(aggregator != null || postAggregator != null, "Must have an AggregatorFactory or PostAggregator for metric[%s], gave[%s] and [%s]", metric, aggregatorSpecs, postAggregatorSpecs);
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Predicate(com.google.common.base.Predicate)

Aggregations

PostAggregator (io.druid.query.aggregation.PostAggregator)98 Test (org.junit.Test)72 Result (io.druid.query.Result)51 DateTime (org.joda.time.DateTime)47 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)43 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)27 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)26 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)26 ArithmeticPostAggregator (io.druid.query.aggregation.post.ArithmeticPostAggregator)25 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)24 FieldAccessPostAggregator (io.druid.query.aggregation.post.FieldAccessPostAggregator)24 ConstantPostAggregator (io.druid.query.aggregation.post.ConstantPostAggregator)23 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)23 HashMap (java.util.HashMap)23 Row (io.druid.data.input.Row)15 RegexDimExtractionFn (io.druid.query.extraction.RegexDimExtractionFn)14 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)13 DimensionSpec (io.druid.query.dimension.DimensionSpec)12 CountAggregator (io.druid.query.aggregation.CountAggregator)10 ExpressionPostAggregator (io.druid.query.aggregation.post.ExpressionPostAggregator)10