use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testJoinOnGroupByInsteadOfTimeseriesWithFloorOnTime.
@Test
public void testJoinOnGroupByInsteadOfTimeseriesWithFloorOnTime() throws Exception {
// Cannot vectorize JOIN operator.
cannotVectorize();
testQuery("SELECT CAST(__time AS BIGINT), m1, ANY_VALUE(dim3, 100) FROM foo WHERE (CAST(TIME_FLOOR(__time, 'PT1H') AS BIGINT), m1) IN\n" + " (\n" + " SELECT CAST(TIME_FLOOR(__time, 'PT1H') AS BIGINT) + 0 AS t1, MIN(m1) AS t2 FROM foo WHERE dim3 = 'b'\n" + " AND __time BETWEEN '1994-04-29 00:00:00' AND '2020-01-11 00:00:00' GROUP BY 1\n" + " )\n" + "GROUP BY 1, 2\n", ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Intervals.of("1994-04-29/2020-01-11T00:00:00.001Z"))).setVirtualColumns(expressionVirtualColumn("v0", "(timestamp_floor(\"__time\",'PT1H',null,'UTC') + 0)", ColumnType.LONG)).setDimFilter(selector("dim3", "b", null)).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG))).setAggregatorSpecs(aggregators(new FloatMinAggregatorFactory("a0", "m1"))).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", "((timestamp_floor(\"__time\",'PT1H',null,'UTC') == \"j0.d0\") && (\"m1\" == \"j0.a0\"))", JoinType.INNER)).setInterval(querySegmentSpec(Filtration.eternity())).setDimensions(new DefaultDimensionSpec("__time", "d0", ColumnType.LONG), new DefaultDimensionSpec("m1", "d1", ColumnType.FLOAT)).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new StringAnyAggregatorFactory("a0", "dim3", 100))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 946684800000L, 1.0f, "[a, b]" }, new Object[] { 946771200000L, 2.0f, "[b, c]" }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testSemiAndAntiJoinSimultaneouslyUsingWhereInSubquery.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testSemiAndAntiJoinSimultaneouslyUsingWhereInSubquery(Map<String, Object> queryContext) throws Exception {
cannotVectorize();
testQuery("SELECT dim1, COUNT(*) FROM foo\n" + "WHERE dim1 IN ('abc', 'def')\n" + "AND __time IN (SELECT MAX(__time) FROM foo)\n" + "AND __time NOT IN (SELECT MIN(__time) FROM foo)\n" + "GROUP BY 1", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(join(join(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(new LongMaxAggregatorFactory("a0", "__time")).context(QUERY_CONTEXT_DEFAULT).build()), "j0.", "(\"__time\" == \"j0.a0\")", JoinType.INNER), new QueryDataSource(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(new LongMinAggregatorFactory("a0", "__time")).context(QUERY_CONTEXT_DEFAULT).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(new CountAggregatorFactory("_a0"), NullHandling.sqlCompatible() ? new FilteredAggregatorFactory(new CountAggregatorFactory("_a1"), not(selector("a0", null, null))) : new CountAggregatorFactory("_a1")).setContext(QUERY_CONTEXT_DEFAULT).build()), "_j0.", "1", JoinType.INNER), new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(new LongMinAggregatorFactory("a0", "__time")).postAggregators(expressionPostAgg("p0", "1")).context(QUERY_CONTEXT_DEFAULT).build()), "__j0.", "(\"__time\" == \"__j0.a0\")", JoinType.LEFT)).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(and(in("dim1", ImmutableList.of("abc", "def"), null), or(selector("_j0._a0", "0", null), and(selector("__j0.p0", null, null), expressionFilter("(\"_j0._a1\" >= \"_j0._a0\")"))))).setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING))).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setContext(queryContext).build()), ImmutableList.of(new Object[] { "abc", 1L }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testLeftJoinSubqueryWithNullKeyFilter.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testLeftJoinSubqueryWithNullKeyFilter(Map<String, Object> queryContext) throws Exception {
// Cannot vectorize due to 'concat' expression.
cannotVectorize();
ScanQuery nullCompatibleModePlan = newScanQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(new LookupDataSource("lookyloo")).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "concat(\"k\",'')", ColumnType.STRING)).setDimensions(new DefaultDimensionSpec("v0", "d0")).build()), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.d0")), JoinType.INNER)).intervals(querySegmentSpec(Filtration.eternity())).columns("dim1", "j0.d0").context(queryContext).build();
ScanQuery nonNullCompatibleModePlan = newScanQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(new LookupDataSource("lookyloo")).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "concat(\"k\",'')", ColumnType.STRING)).setDimensions(new DefaultDimensionSpec("v0", "d0")).build()), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.d0")), JoinType.LEFT)).intervals(querySegmentSpec(Filtration.eternity())).columns("dim1", "j0.d0").filters(new NotDimFilter(new SelectorDimFilter("j0.d0", null, null))).context(queryContext).build();
boolean isJoinFilterRewriteEnabled = queryContext.getOrDefault(JOIN_FILTER_REWRITE_ENABLE_KEY, true).toString().equals("true");
testQuery("SELECT dim1, l1.k\n" + "FROM foo\n" + "LEFT JOIN (select k || '' as k from lookup.lookyloo group by 1) l1 ON foo.dim1 = l1.k\n" + "WHERE l1.k IS NOT NULL\n", queryContext, ImmutableList.of(NullHandling.sqlCompatible() ? nullCompatibleModePlan : nonNullCompatibleModePlan), NullHandling.sqlCompatible() || !isJoinFilterRewriteEnabled ? ImmutableList.of(new Object[] { "abc", "abc" }) : ImmutableList.of(new Object[] { "10.1", "" }, // this result is incorrect. TODO : fix this result when the JoinFilterAnalyzer bug is fixed
new Object[] { "2", "" }, new Object[] { "1", "" }, new Object[] { "def", "" }, new Object[] { "abc", "abc" }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testUsingSubqueryAsPartOfOrFilter.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testUsingSubqueryAsPartOfOrFilter(Map<String, Object> queryContext) throws Exception {
// Cannot vectorize JOIN operator.
cannotVectorize();
testQuery("SELECT dim1, dim2, COUNT(*) FROM druid.foo\n" + "WHERE dim1 = 'xxx' OR dim2 IN (SELECT dim1 FROM druid.foo WHERE dim1 LIKE '%bc')\n" + "group by dim1, dim2 ORDER BY dim2", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(join(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).filters(new LikeDimFilter("dim1", "%bc", null, null)).granularity(Granularities.ALL).aggregators(new CountAggregatorFactory("a0")).context(QUERY_CONTEXT_DEFAULT).build()), "j0.", "1", JoinType.INNER), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "1", ColumnType.LONG)).setDimFilter(new LikeDimFilter("dim1", "%bc", null, null)).setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("v0", "d1", ColumnType.LONG))).setContext(queryContext).build()), "_j0.", equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("_j0.d0")), JoinType.LEFT)).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(or(selector("dim1", "xxx", null), and(not(selector("j0.a0", "0", null)), not(selector("_j0.d1", null, null)), not(selector("dim2", null, null))))).setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1"))).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setLimitSpec(new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("d1", OrderByColumnSpec.Direction.ASCENDING)), Integer.MAX_VALUE)).setContext(queryContext).build()), ImmutableList.of(new Object[] { "def", "abc", 1L }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testJoinOuterGroupByAndSubqueryHasLimit.
@Test
public void testJoinOuterGroupByAndSubqueryHasLimit() throws Exception {
// Cannot vectorize JOIN operator.
cannotVectorize();
testQuery("SELECT dim2, AVG(m2) FROM (SELECT * FROM foo AS t1 INNER JOIN foo AS t2 ON t1.m1 = t2.m1 LIMIT 10) AS t3 GROUP BY dim2", ImmutableList.of(GroupByQuery.builder().setDataSource(newScanQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).columns(ImmutableList.of("m1")).resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(DruidExpression.ofColumn(ColumnType.FLOAT, "m1"), DruidExpression.ofColumn(ColumnType.FLOAT, "j0.m1")), JoinType.INNER)).intervals(querySegmentSpec(Filtration.eternity())).limit(10).columns("dim2", "m2").context(QUERY_CONTEXT_DEFAULT).build()).setInterval(querySegmentSpec(Filtration.eternity())).setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)).setGranularity(Granularities.ALL).setAggregatorSpecs(useDefault ? aggregators(new DoubleSumAggregatorFactory("a0:sum", "m2"), new CountAggregatorFactory("a0:count")) : aggregators(new DoubleSumAggregatorFactory("a0:sum", "m2"), new FilteredAggregatorFactory(new CountAggregatorFactory("a0:count"), not(selector("m2", null, null))))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "a0:sum"), new FieldAccessPostAggregator(null, "a0:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), NullHandling.sqlCompatible() ? ImmutableList.of(new Object[] { null, 4.0 }, new Object[] { "", 3.0 }, new Object[] { "a", 2.5 }, new Object[] { "abc", 5.0 }) : ImmutableList.of(new Object[] { "", 3.6666666666666665 }, new Object[] { "a", 2.5 }, new Object[] { "abc", 5.0 }));
}
Aggregations