Search in sources :

Example 1 with HyperUniqueFinalizingPostAggregator

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.

the class CalciteCorrelatedQueryTest method testCorrelatedSubquery.

@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testCorrelatedSubquery(Map<String, Object> queryContext) throws Exception {
    cannotVectorize();
    queryContext = withLeftDirectAccessEnabled(queryContext);
    testQuery("select country, ANY_VALUE(\n" + "        select avg(\"users\") from (\n" + "            select floor(__time to day), count(distinct user) \"users\" from visits f where f.country = visits.country group by 1\n" + "        )\n" + "     ) as \"DAU\"\n" + "from visits \n" + "group by 1", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.USERVISITDATASOURCE), new QueryDataSource(GroupByQuery.builder().setDataSource(GroupByQuery.builder().setDataSource(CalciteTests.USERVISITDATASOURCE).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setVirtualColumns(new ExpressionVirtualColumn("v0", "timestamp_floor(\"__time\",'P1D',null,'UTC')", ColumnType.LONG, TestExprMacroTable.INSTANCE)).setDimFilter(not(selector("country", null, null))).setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG), new DefaultDimensionSpec("country", "d1")).setAggregatorSpecs(new CardinalityAggregatorFactory("a0:a", null, Collections.singletonList(new DefaultDimensionSpec("user", "user")), false, true)).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator("a0", "a0:a"))).setContext(withTimestampResultContext(queryContext, "d0", Granularities.DAY)).setGranularity(new AllGranularity()).build()).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setDimensions(new DefaultDimensionSpec("d1", "_d0")).setAggregatorSpecs(new LongSumAggregatorFactory("_a0:sum", "a0"), useDefault ? new CountAggregatorFactory("_a0:count") : new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), not(selector("a0", null, null)))).setPostAggregatorSpecs(Collections.singletonList(new ArithmeticPostAggregator("_a0", "quotient", Arrays.asList(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setGranularity(new AllGranularity()).setContext(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("country"), makeColumnExpression("j0._d0")), JoinType.LEFT)).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setDimensions(new DefaultDimensionSpec("country", "d0")).setAggregatorSpecs(new LongAnyAggregatorFactory("a0", "j0._a0")).setGranularity(new AllGranularity()).setContext(queryContext).build()), ImmutableList.of(new Object[] { "India", 2L }, new Object[] { "USA", 1L }, new Object[] { "canada", 3L }));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) LongAnyAggregatorFactory(org.apache.druid.query.aggregation.any.LongAnyAggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) Parameters(junitparams.Parameters) Test(org.junit.Test)

Example 2 with HyperUniqueFinalizingPostAggregator

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithLimitOnFinalizedHyperUnique.

@Test
public void testGroupByWithLimitOnFinalizedHyperUnique() {
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, OrderByColumnSpec.Direction.DESCENDING)), 3)).setAggregatorSpecs(QueryRunnerTestHelper.QUALITY_UNIQUES).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUE_METRIC))).build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_9), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "total_market", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "upfront", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
Also used : OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 3 with HyperUniqueFinalizingPostAggregator

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithOrderOnHyperUnique.

@Test
public void testGroupByWithOrderOnHyperUnique() {
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(QueryRunnerTestHelper.UNIQUE_METRIC, OrderByColumnSpec.Direction.DESCENDING)), 3)).setAggregatorSpecs(QueryRunnerTestHelper.QUALITY_UNIQUES).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUE_METRIC))).build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_9), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "total_market", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "upfront", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
Also used : OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with HyperUniqueFinalizingPostAggregator

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithHavingOnHyperUnique.

@Test
public void testGroupByWithHavingOnHyperUnique() {
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(QueryRunnerTestHelper.UNIQUE_METRIC, OrderByColumnSpec.Direction.DESCENDING)), 3)).setHavingSpec(new GreaterThanHavingSpec(QueryRunnerTestHelper.UNIQUE_METRIC, 8)).setAggregatorSpecs(QueryRunnerTestHelper.QUALITY_UNIQUES).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUE_METRIC))).build();
    List<ResultRow> expectedResults = Collections.singletonList(makeRow(query, "1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_9));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
Also used : OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) GreaterThanHavingSpec(org.apache.druid.query.groupby.having.GreaterThanHavingSpec) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with HyperUniqueFinalizingPostAggregator

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.

the class BuiltinApproxCountDistinctSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    // Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
    // for string columns.
    final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
    final DruidExpression arg = Expressions.toDruidExpression(plannerContext, rowSignature, rexNode);
    if (arg == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
    if (arg.isDirectColumnAccess() && rowSignature.getColumnType(arg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
        aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true);
    } else {
        final RelDataType dataType = rexNode.getType();
        final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
        if (inputType == null) {
            throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
        }
        final DimensionSpec dimensionSpec;
        if (arg.isSimpleExtraction()) {
            dimensionSpec = arg.getSimpleExtraction().toDimensionSpec(null, inputType);
        } else {
            String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(arg, dataType);
            dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
        }
        aggregatorFactory = new CardinalityAggregatorFactory(aggregatorName, null, ImmutableList.of(dimensionSpec), false, true);
    }
    return Aggregation.create(Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RelDataType(org.apache.calcite.rel.type.RelDataType) ISE(org.apache.druid.java.util.common.ISE) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Aggregations

HyperUniqueFinalizingPostAggregator (org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)7 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)7 Test (org.junit.Test)6 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)4 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)4 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)4 CardinalityAggregatorFactory (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)3 QueryDataSource (org.apache.druid.query.QueryDataSource)2 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)2 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)2 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)2 ArithmeticPostAggregator (org.apache.druid.query.aggregation.post.ArithmeticPostAggregator)2 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)2 GreaterThanHavingSpec (org.apache.druid.query.groupby.having.GreaterThanHavingSpec)2 Nullable (javax.annotation.Nullable)1 Parameters (junitparams.Parameters)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexNode (org.apache.calcite.rex.RexNode)1 ISE (org.apache.druid.java.util.common.ISE)1 AllGranularity (org.apache.druid.java.util.common.granularity.AllGranularity)1