use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteQueryTest method testExactCountDistinctUsingSubqueryWithWhereToOuterFilter.
@Test
public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() throws Exception {
// Cannot vectorize topN operator.
cannotVectorize();
testQuery("SELECT\n" + " SUM(cnt),\n" + " COUNT(*)\n" + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)" + "WHERE cnt > 0", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("dim2", "d0")).aggregators(new LongSumAggregatorFactory("a0", "cnt")).metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)).threshold(1).context(QUERY_CONTEXT_DEFAULT).build())).setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("_a0", "a0"), new CountAggregatorFactory("_a1"))).setContext(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 3L, 1L }) : ImmutableList.of(new Object[] { 2L, 1L }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteQueryTest method testExactCountDistinctOfSemiJoinResult.
@Test
public void testExactCountDistinctOfSemiJoinResult() throws Exception {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
testQuery("SELECT COUNT(*)\n" + "FROM (\n" + " SELECT DISTINCT dim2\n" + " FROM druid.foo\n" + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + ")", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(not(selector("dim1", "", null))).setDimensions(dimensions(new ExtractionDimensionSpec("dim1", "d0", new SubstringDimExtractionFn(0, 1)))).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(makeExpression("substring(\"dim2\", 0, 1)"), DruidExpression.ofColumn(ColumnType.STRING, "j0.d0")), JoinType.INNER)).setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 2L }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteArraysQueryTest method testArrayAggAsArrayFromJoin.
@Test
public void testArrayAggAsArrayFromJoin() throws Exception {
cannotVectorize();
List<Object[]> expectedResults;
if (useDefault) {
expectedResults = ImmutableList.of(new Object[] { "a", "[\"10.1\",\"2\"]", "10.1,2" }, new Object[] { "a", "[\"10.1\",\"2\"]", "10.1,2" }, new Object[] { "a", "[\"10.1\",\"2\"]", "10.1,2" }, new Object[] { "b", "[\"1\",\"abc\",\"def\"]", "1,abc,def" }, new Object[] { "b", "[\"1\",\"abc\",\"def\"]", "1,abc,def" }, new Object[] { "b", "[\"1\",\"abc\",\"def\"]", "1,abc,def" });
} else {
expectedResults = ImmutableList.of(new Object[] { "a", "[\"\",\"10.1\",\"2\"]", ",10.1,2" }, new Object[] { "a", "[\"\",\"10.1\",\"2\"]", ",10.1,2" }, new Object[] { "a", "[\"\",\"10.1\",\"2\"]", ",10.1,2" }, new Object[] { "b", "[\"1\",\"abc\",\"def\"]", "1,abc,def" }, new Object[] { "b", "[\"1\",\"abc\",\"def\"]", "1,abc,def" }, new Object[] { "b", "[\"1\",\"abc\",\"def\"]", "1,abc,def" });
}
testQuery("SELECT numfoo.dim4, j.arr, ARRAY_TO_STRING(j.arr, ',') FROM numfoo INNER JOIN (SELECT dim4, ARRAY_AGG(DISTINCT dim1) as arr FROM numfoo WHERE dim1 is not null GROUP BY 1) as j ON numfoo.dim4 = j.dim4", ImmutableList.of(Druids.newScanQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE3), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE3).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(not(selector("dim1", null, null))).setDimensions(new DefaultDimensionSpec("dim4", "_d0")).setAggregatorSpecs(aggregators(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("dim1"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a0\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE))).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", "(\"dim4\" == \"j0._d0\")", JoinType.INNER, null)).virtualColumns(expressionVirtualColumn("v0", "array_to_string(\"j0.a0\",',')", ColumnType.STRING)).intervals(querySegmentSpec(Filtration.eternity())).columns("dim4", "j0.a0", "v0").context(QUERY_CONTEXT_DEFAULT).resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).legacy(false).build()), expectedResults);
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteQueryTest method testMinMaxAvgDailyCountWithLimit.
@Test
public void testMinMaxAvgDailyCountWithLimit() throws Exception {
// Cannot vectorize due to virtual columns.
cannotVectorize();
testQuery("SELECT * FROM (" + " SELECT max(cnt), min(cnt), avg(cnt), TIME_EXTRACT(max(t), 'EPOCH') last_time, count(1) num_days FROM (\n" + " SELECT TIME_FLOOR(__time, 'P1D') AS t, count(1) cnt\n" + " FROM \"foo\"\n" + " GROUP BY 1\n" + " )" + ") LIMIT 1\n", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)).intervals(querySegmentSpec(Filtration.eternity())).aggregators(new CountAggregatorFactory("a0")).context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(useDefault ? aggregators(new LongMaxAggregatorFactory("_a0", "a0"), new LongMinAggregatorFactory("_a1", "a0"), new LongSumAggregatorFactory("_a2:sum", "a0"), new CountAggregatorFactory("_a2:count"), new LongMaxAggregatorFactory("_a3", "d0"), new CountAggregatorFactory("_a4")) : aggregators(new LongMaxAggregatorFactory("_a0", "a0"), new LongMinAggregatorFactory("_a1", "a0"), new LongSumAggregatorFactory("_a2:sum", "a0"), new FilteredAggregatorFactory(new CountAggregatorFactory("_a2:count"), not(selector("a0", null, null))), new LongMaxAggregatorFactory("_a3", "d0"), new CountAggregatorFactory("_a4"))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a2", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a2:sum"), new FieldAccessPostAggregator(null, "_a2:count"))), expressionPostAgg("p0", "timestamp_extract(\"_a3\",'EPOCH','UTC')"))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1L, 1L, 1L, 978480000L, 6L }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class DruidJoinQueryRel method toDruidQuery.
@Override
public DruidQuery toDruidQuery(final boolean finalizeAggregations) {
final DruidRel<?> leftDruidRel = (DruidRel<?>) left;
final DruidQuery leftQuery = Preconditions.checkNotNull((leftDruidRel).toDruidQuery(false), "leftQuery");
final RowSignature leftSignature = leftQuery.getOutputRowSignature();
final DataSource leftDataSource;
final DruidRel<?> rightDruidRel = (DruidRel<?>) right;
final DruidQuery rightQuery = Preconditions.checkNotNull(rightDruidRel.toDruidQuery(false), "rightQuery");
final RowSignature rightSignature = rightQuery.getOutputRowSignature();
final DataSource rightDataSource;
if (computeLeftRequiresSubquery(leftDruidRel)) {
leftDataSource = new QueryDataSource(leftQuery.getQuery());
if (leftFilter != null) {
throw new ISE("Filter on left table is supposed to be null if left child is a query source");
}
} else {
leftDataSource = leftQuery.getDataSource();
}
if (computeRightRequiresSubquery(rightDruidRel)) {
rightDataSource = new QueryDataSource(rightQuery.getQuery());
} else {
rightDataSource = rightQuery.getDataSource();
}
final Pair<String, RowSignature> prefixSignaturePair = computeJoinRowSignature(leftSignature, rightSignature);
VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create(prefixSignaturePair.rhs, getPlannerContext().getExprMacroTable());
getPlannerContext().setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry);
// Generate the condition for this join as a Druid expression.
final DruidExpression condition = Expressions.toDruidExpression(getPlannerContext(), prefixSignaturePair.rhs, joinRel.getCondition());
// Unsetting it to avoid any VC Registry leaks incase there are multiple druid quries for the SQL
// It should be fixed soon with changes in interface for SqlOperatorConversion and Expressions bridge class
getPlannerContext().setJoinExpressionVirtualColumnRegistry(null);
// quiets static code analysis.
if (condition == null) {
throw new CannotBuildQueryException(joinRel, joinRel.getCondition());
}
return partialQuery.build(JoinDataSource.create(leftDataSource, rightDataSource, prefixSignaturePair.lhs, condition.getExpression(), toDruidJoinType(joinRel.getJoinType()), getDimFilter(getPlannerContext(), leftSignature, leftFilter), getPlannerContext().getExprMacroTable()), prefixSignaturePair.rhs, getPlannerContext(), getCluster().getRexBuilder(), finalizeAggregations, virtualColumnRegistry);
}
Aggregations