Search in sources :

Example 11 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class ApproximateHistogramGroupByQueryTest method testGroupByWithApproximateHistogramAgg.

@Test
public void testGroupByWithApproximateHistogramAgg() {
    ApproximateHistogramAggregatorFactory aggFactory = new ApproximateHistogramAggregatorFactory("apphisto", "index", 10, 5, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, "marketalias"))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec("marketalias", OrderByColumnSpec.Direction.DESCENDING)), 1)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, aggFactory)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new QuantilePostAggregator("quantile", "apphisto", 0.5f))).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "marketalias", "upfront", "rows", 186L, "quantile", 880.9881f, "apphisto", new Histogram(new float[] { 214.97299194335938f, 545.9906005859375f, 877.0081787109375f, 1208.0257568359375f, 1539.0433349609375f, 1870.06103515625f }, new double[] { 0.0, 67.53287506103516, 72.22068786621094, 31.984678268432617, 14.261756896972656 })));
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "approx-histo");
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 12 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class ApproximateHistogramGroupByQueryTest method testGroupByWithSameNameComplexPostAgg.

@Test(expected = IllegalArgumentException.class)
public void testGroupByWithSameNameComplexPostAgg() {
    ApproximateHistogramAggregatorFactory aggFactory = new ApproximateHistogramAggregatorFactory("quantile", "index", 10, 5, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, "marketalias"))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec("marketalias", OrderByColumnSpec.Direction.DESCENDING)), 1)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, aggFactory)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new QuantilePostAggregator("quantile", "quantile", 0.5f))).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "marketalias", "upfront", "rows", 186L, "quantile", 880.9881f));
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "approx-histo");
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) Row(io.druid.data.input.Row) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 13 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class VarianceGroupByQueryTest method testGroupByVarianceOnly.

@Test
public void testGroupByVarianceOnly() {
    GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(VarianceTestHelper.indexVarianceAggr)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(VarianceTestHelper.stddevOfIndexPostAggr)).setGranularity(QueryRunnerTestHelper.dayGran).build();
    VarianceTestHelper.RowBuilder builder = new VarianceTestHelper.RowBuilder(new String[] { "alias", "index_stddev", "index_var" });
    List<Row> expectedResults = builder.add("2011-04-01", "automotive", 0d, 0d).add("2011-04-01", "business", 0d, 0d).add("2011-04-01", "entertainment", 0d, 0d).add("2011-04-01", "health", 0d, 0d).add("2011-04-01", "mezzanine", 737.0179286322613d, 543195.4271253889d).add("2011-04-01", "news", 0d, 0d).add("2011-04-01", "premium", 726.6322593583996d, 527994.4403402924d).add("2011-04-01", "technology", 0d, 0d).add("2011-04-01", "travel", 0d, 0d).add("2011-04-02", "automotive", 0d, 0d).add("2011-04-02", "business", 0d, 0d).add("2011-04-02", "entertainment", 0d, 0d).add("2011-04-02", "health", 0d, 0d).add("2011-04-02", "mezzanine", 611.3420766546617d, 373739.13468843425d).add("2011-04-02", "news", 0d, 0d).add("2011-04-02", "premium", 621.3898134843073d, 386125.30030206224d).add("2011-04-02", "technology", 0d, 0d).add("2011-04-02", "travel", 0d, 0d).build();
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 14 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class VarianceGroupByQueryTest method testPostAggHavingSpec.

@Test
public void testPostAggHavingSpec() {
    VarianceTestHelper.RowBuilder expect = new VarianceTestHelper.RowBuilder(new String[] { "alias", "rows", "index", "index_var", "index_stddev" });
    List<Row> expectedResults = expect.add("2011-04-01", "automotive", 2L, 269L, 299.0009819048282, 17.29164485827847).add("2011-04-01", "mezzanine", 6L, 4420L, 254083.76447001836, 504.06722217380724).add("2011-04-01", "premium", 6L, 4416L, 252279.2020389339, 502.27403082275106).build();
    GroupByQuery query = GroupByQuery.builder().setDataSource(VarianceTestHelper.dataSource).setInterval("2011-04-02/2011-04-04").setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(VarianceTestHelper.rowsCount, VarianceTestHelper.indexLongSum, VarianceTestHelper.indexVarianceAggr)).setPostAggregatorSpecs(ImmutableList.<PostAggregator>of(VarianceTestHelper.stddevOfIndexPostAggr)).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).setHavingSpec(new OrHavingSpec(ImmutableList.<HavingSpec>of(// 3 rows
    new GreaterThanHavingSpec(VarianceTestHelper.stddevOfIndexMetric, 15L)))).build();
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
    query = query.withLimitSpec(new DefaultLimitSpec(Arrays.<OrderByColumnSpec>asList(OrderByColumnSpec.asc(VarianceTestHelper.stddevOfIndexMetric)), 2));
    expectedResults = expect.add("2011-04-01", "automotive", 2L, 269L, 299.0009819048282, 17.29164485827847).add("2011-04-01", "premium", 6L, 4416L, 252279.2020389339, 502.27403082275106).build();
    results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) OrHavingSpec(io.druid.query.groupby.having.OrHavingSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) PeriodGranularity(io.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) GreaterThanHavingSpec(io.druid.query.groupby.having.GreaterThanHavingSpec) Row(io.druid.data.input.Row) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 15 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByMergingQueryRunnerV2 method run.

@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    // CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
    // (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
    // merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
    // will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
    // I'm not sure of a better way to do this without tweaking how realtime servers do queries.
    final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
    final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
    if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
        return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
    }
    final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
    final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final int priority = BaseQuery.getContextPriority(query, 0);
    // Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
    // query processing together.
    final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
    final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
                resources.add(temporaryStorageHolder);
                final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
                try {
                    // This will potentially block if there are no merge buffers left in the pool.
                    final long timeout = timeoutAt - System.currentTimeMillis();
                    if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
                        throw new TimeoutException();
                    }
                    resources.add(mergeBufferHolder);
                } catch (Exception e) {
                    throw new QueryInterruptedException(e);
                }
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                grouper.init();
                final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
                resources.add(grouperHolder);
                ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {

                    @Override
                    public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
                        if (input == null) {
                            throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
                        }
                        ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {

                            @Override
                            public Boolean call() throws Exception {
                                try (Releaser bufferReleaser = mergeBufferHolder.increment();
                                    Releaser grouperReleaser = grouperHolder.increment()) {
                                    final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
                                    // Return true if OK, false if resources were exhausted.
                                    return retVal == grouper;
                                } catch (QueryInterruptedException e) {
                                    throw e;
                                } catch (Exception e) {
                                    log.error(e, "Exception with one of the sequences!");
                                    throw Throwables.propagate(e);
                                }
                            }
                        });
                        if (isSingleThreaded) {
                            waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
                        }
                        return future;
                    }
                })));
                if (!isSingleThreaded) {
                    waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(resources)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(resources)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) ChainedExecutionQueryRunner(io.druid.query.ChainedExecutionQueryRunner) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Releaser(io.druid.collections.Releaser) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ISE(io.druid.java.util.common.ISE) TimeoutException(java.util.concurrent.TimeoutException) QueryInterruptedException(io.druid.query.QueryInterruptedException) Pair(io.druid.java.util.common.Pair) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) BaseSequence(io.druid.java.util.common.guava.BaseSequence) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) QueryInterruptedException(io.druid.query.QueryInterruptedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ChainedExecutionQueryRunner(io.druid.query.ChainedExecutionQueryRunner) QueryRunner(io.druid.query.QueryRunner) ReferenceCountingResourceHolder(io.druid.collections.ReferenceCountingResourceHolder) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Row(io.druid.data.input.Row) File(java.io.File)

Aggregations

GroupByQuery (io.druid.query.groupby.GroupByQuery)26 Row (io.druid.data.input.Row)19 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)15 Test (org.junit.Test)13 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)12 DimensionSpec (io.druid.query.dimension.DimensionSpec)10 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 ArrayList (java.util.ArrayList)7 InputRow (io.druid.data.input.InputRow)6 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 List (java.util.List)6 QueryRunner (io.druid.query.QueryRunner)5 PostAggregator (io.druid.query.aggregation.PostAggregator)5 Interval (org.joda.time.Interval)5 Function (com.google.common.base.Function)4 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)4 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)4 MultipleIntervalSegmentSpec (io.druid.query.spec.MultipleIntervalSegmentSpec)4 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)4 ISE (io.druid.java.util.common.ISE)3