Search in sources :

Example 1 with CardinalityAggregatorFactory

use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.

the class CalciteQueryTest method testApproxCountDistinct.

@Test
public void testApproxCountDistinct() throws Exception {
    // Cannot vectorize due to virtual columns.
    cannotVectorize();
    testQuery("SELECT\n" + "  SUM(cnt),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT(dim2),\n" + // lowercase; also, filtered
    "  approx_count_distinct(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native hyperUnique column
    "  approx_count_distinct(unique_dim1)\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING)).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new CardinalityAggregatorFactory("a1", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), new FilteredAggregatorFactory(new CardinalityAggregatorFactory("a2", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), not(selector("dim2", "", null))), new CardinalityAggregatorFactory("a3", null, dimensions(new ExtractionDimensionSpec("dim2", "dim2", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))), false, true), new CardinalityAggregatorFactory("a4", null, dimensions(new DefaultDimensionSpec("v0", "v0", ColumnType.STRING)), false, true), new HyperUniquesAggregatorFactory("a5", "unique_dim1", false, true))).context(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L, 2L, 2L, 2L, 6L }) : ImmutableList.of(new Object[] { 6L, 3L, 2L, 1L, 1L, 6L }));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 2 with CardinalityAggregatorFactory

use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.

the class CalciteQueryTest method testCountDistinctOfSubstring.

@Test
public void testCountDistinctOfSubstring() throws Exception {
    // Cannot vectorize due to extraction dimension spec.
    cannotVectorize();
    testQuery("SELECT COUNT(DISTINCT SUBSTRING(dim1, 1, 1)) FROM druid.foo WHERE dim1 <> ''", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).filters(not(selector("dim1", "", null))).granularity(Granularities.ALL).aggregators(aggregators(new CardinalityAggregatorFactory("a0", null, dimensions(new ExtractionDimensionSpec("dim1", null, new SubstringDimExtractionFn(0, 1))), false, true))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 4L }));
}
Also used : SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 3 with CardinalityAggregatorFactory

use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.

the class CalciteJoinQueryTest method testSemiJoinWithOuterTimeExtractAggregateWithOrderBy.

@Test
public void testSemiJoinWithOuterTimeExtractAggregateWithOrderBy() throws Exception {
    // Cannot vectorize due to virtual columns.
    cannotVectorize();
    testQuery("SELECT COUNT(DISTINCT dim1), EXTRACT(MONTH FROM __time) FROM druid.foo\n" + " WHERE dim2 IN (\n" + "   SELECT dim2\n" + "   FROM druid.foo\n" + "   WHERE dim1 = 'def'\n" + " ) AND dim1 <> ''" + "GROUP BY EXTRACT(MONTH FROM __time)\n" + "ORDER BY EXTRACT(MONTH FROM __time)", ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setDimFilter(selector("dim1", "def", null)).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), JoinType.INNER)).setVirtualColumns(expressionVirtualColumn("v0", "timestamp_extract(\"__time\",'MONTH','UTC')", ColumnType.LONG)).setDimFilter(not(selector("dim1", "", null))).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG))).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new CardinalityAggregatorFactory("a0", null, ImmutableList.of(new DefaultDimensionSpec("dim1", "dim1", ColumnType.STRING)), false, true))).setLimitSpec(new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("d0", OrderByColumnSpec.Direction.ASCENDING, StringComparators.NUMERIC)), Integer.MAX_VALUE)).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1L, 1L }));
}
Also used : OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 4 with CardinalityAggregatorFactory

use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.

the class CalciteJoinQueryTest method testCountDistinctOfLookupUsingJoinOperator.

@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testCountDistinctOfLookupUsingJoinOperator(Map<String, Object> queryContext) throws Exception {
    // Cannot yet vectorize the JOIN operator.
    cannotVectorize();
    testQuery("SELECT COUNT(DISTINCT lookyloo.v)\n" + "FROM foo LEFT JOIN lookup.lookyloo ON foo.dim1 = lookyloo.k", queryContext, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new LookupDataSource("lookyloo"), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.k")), JoinType.LEFT)).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(aggregators(new CardinalityAggregatorFactory("a0", null, ImmutableList.of(DefaultDimensionSpec.of("j0.v")), false, true))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { NullHandling.replaceWithDefault() ? 2L : 1L }));
}
Also used : GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) LookupDataSource(org.apache.druid.query.LookupDataSource) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) Parameters(junitparams.Parameters) Test(org.junit.Test)

Example 5 with CardinalityAggregatorFactory

use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.

the class CalciteCorrelatedQueryTest method testCorrelatedSubquery.

@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testCorrelatedSubquery(Map<String, Object> queryContext) throws Exception {
    cannotVectorize();
    queryContext = withLeftDirectAccessEnabled(queryContext);
    testQuery("select country, ANY_VALUE(\n" + "        select avg(\"users\") from (\n" + "            select floor(__time to day), count(distinct user) \"users\" from visits f where f.country = visits.country group by 1\n" + "        )\n" + "     ) as \"DAU\"\n" + "from visits \n" + "group by 1", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.USERVISITDATASOURCE), new QueryDataSource(GroupByQuery.builder().setDataSource(GroupByQuery.builder().setDataSource(CalciteTests.USERVISITDATASOURCE).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setVirtualColumns(new ExpressionVirtualColumn("v0", "timestamp_floor(\"__time\",'P1D',null,'UTC')", ColumnType.LONG, TestExprMacroTable.INSTANCE)).setDimFilter(not(selector("country", null, null))).setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG), new DefaultDimensionSpec("country", "d1")).setAggregatorSpecs(new CardinalityAggregatorFactory("a0:a", null, Collections.singletonList(new DefaultDimensionSpec("user", "user")), false, true)).setPostAggregatorSpecs(Collections.singletonList(new HyperUniqueFinalizingPostAggregator("a0", "a0:a"))).setContext(withTimestampResultContext(queryContext, "d0", Granularities.DAY)).setGranularity(new AllGranularity()).build()).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setDimensions(new DefaultDimensionSpec("d1", "_d0")).setAggregatorSpecs(new LongSumAggregatorFactory("_a0:sum", "a0"), useDefault ? new CountAggregatorFactory("_a0:count") : new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), not(selector("a0", null, null)))).setPostAggregatorSpecs(Collections.singletonList(new ArithmeticPostAggregator("_a0", "quotient", Arrays.asList(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setGranularity(new AllGranularity()).setContext(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("country"), makeColumnExpression("j0._d0")), JoinType.LEFT)).setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)).setDimensions(new DefaultDimensionSpec("country", "d0")).setAggregatorSpecs(new LongAnyAggregatorFactory("a0", "j0._a0")).setGranularity(new AllGranularity()).setContext(queryContext).build()), ImmutableList.of(new Object[] { "India", 2L }, new Object[] { "USA", 1L }, new Object[] { "canada", 3L }));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) LongAnyAggregatorFactory(org.apache.druid.query.aggregation.any.LongAnyAggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) Parameters(junitparams.Parameters) Test(org.junit.Test)

Aggregations

CardinalityAggregatorFactory (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)22 Test (org.junit.Test)21 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)13 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)13 ExtractionDimensionSpec (org.apache.druid.query.dimension.ExtractionDimensionSpec)6 Result (org.apache.druid.query.Result)5 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)5 HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)5 QueryDataSource (org.apache.druid.query.QueryDataSource)3 TableDataSource (org.apache.druid.query.TableDataSource)3 ExpressionLambdaAggregatorFactory (org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory)3 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)3 HyperUniqueFinalizingPostAggregator (org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)3 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)3 SubstringDimExtractionFn (org.apache.druid.query.extraction.SubstringDimExtractionFn)3 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)3 Parameters (junitparams.Parameters)2 GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)2 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)2 LongAnyAggregatorFactory (org.apache.druid.query.aggregation.any.LongAnyAggregatorFactory)2