use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class CalciteArraysQueryTest method testArrayAgg.
@Test
public void testArrayAgg() throws Exception {
cannotVectorize();
testQuery("SELECT ARRAY_AGG(dim1), ARRAY_AGG(DISTINCT dim1), ARRAY_AGG(DISTINCT dim1) FILTER(WHERE dim1 = 'shazbot') FROM foo WHERE dim1 is not null", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(not(selector("dim1", null, null))).aggregators(aggregators(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("dim1"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_append(\"__acc\", \"dim1\")", "array_concat(\"__acc\", \"a0\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), new ExpressionLambdaAggregatorFactory("a1", ImmutableSet.of("dim1"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a1\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a2", ImmutableSet.of("dim1"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a2\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), selector("dim1", "shazbot", null)))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "[\"10.1\",\"2\",\"1\",\"def\",\"abc\"]", "[\"1\",\"10.1\",\"2\",\"abc\",\"def\"]", null } : new Object[] { "[\"\",\"10.1\",\"2\",\"1\",\"def\",\"abc\"]", "[\"\",\"1\",\"10.1\",\"2\",\"abc\",\"def\"]", null }));
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class CalciteArraysQueryTest method testArrayAggGroupByArrayContainsSubquery.
@Test
public void testArrayAggGroupByArrayContainsSubquery() throws Exception {
cannotVectorize();
List<Object[]> expectedResults;
if (useDefault) {
expectedResults = ImmutableList.of(new Object[] { "", 3L }, new Object[] { "a", 1L }, new Object[] { "abc", 1L });
} else {
expectedResults = ImmutableList.of(new Object[] { null, 2L }, new Object[] { "", 1L }, new Object[] { "a", 2L }, new Object[] { "abc", 1L });
}
testQuery("SELECT dim2, COUNT(*) FROM foo WHERE ARRAY_CONTAINS((SELECT ARRAY_AGG(DISTINCT dim1) FROM foo WHERE dim1 is not null), dim1) GROUP BY 1", ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(not(selector("dim1", null, null))).aggregators(aggregators(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("dim1"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a0\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE))).context(QUERY_CONTEXT_DEFAULT).build()), "j0.", "1", JoinType.LEFT, null)).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(new ExpressionDimFilter("array_contains(\"j0.a0\",\"dim1\")", TestExprMacroTable.INSTANCE)).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setGranularity(Granularities.ALL).setLimitSpec(NoopLimitSpec.instance()).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class CalciteArraysQueryTest method testArrayAggArrayContainsSubquery.
@Test
public void testArrayAggArrayContainsSubquery() throws Exception {
cannotVectorize();
List<Object[]> expectedResults;
if (useDefault) {
expectedResults = ImmutableList.of(new Object[] { "10.1", "" }, new Object[] { "2", "" }, new Object[] { "1", "a" }, new Object[] { "def", "abc" }, new Object[] { "abc", "" });
} else {
expectedResults = ImmutableList.of(new Object[] { "", "a" }, new Object[] { "10.1", null }, new Object[] { "2", "" }, new Object[] { "1", "a" }, new Object[] { "def", "abc" }, new Object[] { "abc", null });
}
testQuery("SELECT dim1,dim2 FROM foo WHERE ARRAY_CONTAINS((SELECT ARRAY_AGG(DISTINCT dim1) FROM foo WHERE dim1 is not null), dim1)", ImmutableList.of(Druids.newScanQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(not(selector("dim1", null, null))).aggregators(aggregators(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("dim1"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a0\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE))).context(QUERY_CONTEXT_DEFAULT).build()), "j0.", "1", JoinType.LEFT, null)).intervals(querySegmentSpec(Filtration.eternity())).filters(new ExpressionDimFilter("array_contains(\"j0.a0\",\"dim1\")", TestExprMacroTable.INSTANCE)).columns("dim1", "dim2").context(QUERY_CONTEXT_DEFAULT).resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).legacy(false).build()), expectedResults);
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class CalciteArraysQueryTest method testArrayAggMaxBytes.
@Test
public void testArrayAggMaxBytes() throws Exception {
cannotVectorize();
testQuery("SELECT ARRAY_AGG(l1, 128), ARRAY_AGG(DISTINCT l1, 128) FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(aggregators(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("l1"), "__acc", "ARRAY<LONG>[]", "ARRAY<LONG>[]", true, true, false, "array_append(\"__acc\", \"l1\")", "array_concat(\"__acc\", \"a0\")", null, null, new HumanReadableBytes(128), TestExprMacroTable.INSTANCE), new ExpressionLambdaAggregatorFactory("a1", ImmutableSet.of("l1"), "__acc", "ARRAY<LONG>[]", "ARRAY<LONG>[]", true, true, false, "array_set_add(\"__acc\", \"l1\")", "array_set_add_all(\"__acc\", \"a1\")", null, null, new HumanReadableBytes(128), TestExprMacroTable.INSTANCE))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "[7,325323,0,0,0,0]", "[0,7,325323]" } : new Object[] { "[7,325323,0,null,null,null]", "[null,0,7,325323]" }));
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class TimeseriesQueryRunnerTest method testTimeseriesWithExpressionAggregator.
@Test
public void testTimeseriesWithExpressionAggregator() {
// expression agg cannot vectorize
cannotVectorize();
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.DAY_GRAN).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(Arrays.asList(new ExpressionLambdaAggregatorFactory("diy_count", ImmutableSet.of(), null, "0", null, false, false, false, "__acc + 1", "__acc + diy_count", null, null, null, TestExprMacroTable.INSTANCE), new ExpressionLambdaAggregatorFactory("diy_sum", ImmutableSet.of("index"), null, "0.0", null, null, false, false, "__acc + index", null, null, null, null, TestExprMacroTable.INSTANCE), new ExpressionLambdaAggregatorFactory("diy_decomposed_sum", ImmutableSet.of("index"), null, "0.0", "ARRAY<DOUBLE>[]", null, false, false, "__acc + index", "array_concat(__acc, diy_decomposed_sum)", null, "fold((x, acc) -> x + acc, o, 0.0)", null, TestExprMacroTable.INSTANCE), new ExpressionLambdaAggregatorFactory("array_agg_distinct", ImmutableSet.of(QueryRunnerTestHelper.MARKET_DIMENSION), "acc", "[]", null, null, true, false, "array_set_add(acc, market)", "array_set_add_all(acc, array_agg_distinct)", null, null, null, TestExprMacroTable.INSTANCE))).descending(descending).context(makeContext()).build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(new Result<>(DateTimes.of("2011-04-01"), new TimeseriesResultValue(ImmutableMap.of("diy_count", 13L, "diy_sum", 6626.151569, "diy_decomposed_sum", 6626.151569, "array_agg_distinct", new String[] { "spot", "total_market", "upfront" }))), new Result<>(DateTimes.of("2011-04-02"), new TimeseriesResultValue(ImmutableMap.of("diy_count", 13L, "diy_sum", 5833.209718, "diy_decomposed_sum", 5833.209718, "array_agg_distinct", new String[] { "spot", "total_market", "upfront" }))));
Iterable<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
assertExpectedResults(expectedResults, results);
}
Aggregations