use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.
the class CalciteCorrelatedQueryTest method testCorrelatedSubquery.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testCorrelatedSubquery(Map<String, Object> queryContext) throws Exception {
cannotVectorize();
queryContext = withLeftDirectAccessEnabled(queryContext);
testQuery("select country, ANY_VALUE(\n" + " select avg(\"users\") from (\n" + " select floor(__time to day), count(distinct user) \"users\" from visits f where f.country = visits.country group by 1\n" + " )\n" + " ) as \"DAU\"\n" + "from visits \n" + "group by 1", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.USERVISITDATASOURCE), new QueryDataSource(GroupByQuery.builder().setDataSource(GroupByQuery.builder().setDataSource(CalciteTests.USERVISITDATASOURCE).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setVirtualColumns(new ExpressionVirtualColumn("v0", "timestamp_floor(\"__time\",'P1D',null,'UTC')", ColumnType.LONG, TestExprMacroTable.INSTANCE)).setDimFilter(not(selector("country", null, null))).setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG), new DefaultDimensionSpec("country", "d1")).setAggregatorSpecs(new CardinalityAggregatorFactory("a0:a", null, Collections.singletonList(new DefaultDimensionSpec("user", "user")), false, true)).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator("a0", "a0:a"))).setContext(withTimestampResultContext(queryContext, "d0", Granularities.DAY)).setGranularity(new AllGranularity()).build()).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setDimensions(new DefaultDimensionSpec("d1", "_d0")).setAggregatorSpecs(new LongSumAggregatorFactory("_a0:sum", "a0"), useDefault ? new CountAggregatorFactory("_a0:count") : new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), not(selector("a0", null, null)))).setPostAggregatorSpecs(Collections.singletonList(new ArithmeticPostAggregator("_a0", "quotient", Arrays.asList(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setGranularity(new AllGranularity()).setContext(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("country"), makeColumnExpression("j0._d0")), JoinType.LEFT)).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setDimensions(new DefaultDimensionSpec("country", "d0")).setAggregatorSpecs(new LongAnyAggregatorFactory("a0", "j0._a0")).setGranularity(new AllGranularity()).setContext(queryContext).build()), ImmutableList.of(new Object[] { "India", 2L }, new Object[] { "USA", 1L }, new Object[] { "canada", 3L }));
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithLimitOnFinalizedHyperUnique.
@Test
public void testGroupByWithLimitOnFinalizedHyperUnique() {
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, OrderByColumnSpec.Direction.DESCENDING)), 3)).setAggregatorSpecs(QueryRunnerTestHelper.QUALITY_UNIQUES).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUE_METRIC))).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_9), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "total_market", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "upfront", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithOrderOnHyperUnique.
@Test
public void testGroupByWithOrderOnHyperUnique() {
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(QueryRunnerTestHelper.UNIQUE_METRIC, OrderByColumnSpec.Direction.DESCENDING)), 3)).setAggregatorSpecs(QueryRunnerTestHelper.QUALITY_UNIQUES).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUE_METRIC))).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_9), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "total_market", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2), makeRow(query, "1970-01-01T00:00:00.000Z", "market", "upfront", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_2));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithHavingOnHyperUnique.
@Test
public void testGroupByWithHavingOnHyperUnique() {
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(QueryRunnerTestHelper.UNIQUE_METRIC, OrderByColumnSpec.Direction.DESCENDING)), 3)).setHavingSpec(new GreaterThanHavingSpec(QueryRunnerTestHelper.UNIQUE_METRIC, 8)).setAggregatorSpecs(QueryRunnerTestHelper.QUALITY_UNIQUES).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUE_METRIC))).build();
List<ResultRow> expectedResults = Collections.singletonList(makeRow(query, "1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.UNIQUE_METRIC, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.HYPER_UNIQUE_FINALIZING_POST_AGG_METRIC, QueryRunnerTestHelper.UNIQUES_9));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator in project druid by druid-io.
the class BuiltinApproxCountDistinctSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
final DruidExpression arg = Expressions.toDruidExpression(plannerContext, rowSignature, rexNode);
if (arg == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (arg.isDirectColumnAccess() && rowSignature.getColumnType(arg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true);
} else {
final RelDataType dataType = rexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (arg.isSimpleExtraction()) {
dimensionSpec = arg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(arg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new CardinalityAggregatorFactory(aggregatorName, null, ImmutableList.of(dimensionSpec), false, true);
}
return Aggregation.create(Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null);
}
Aggregations