use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class TopNQueryRunnerTest method testExpressionAggregatorComplex.
@Test
public void testExpressionAggregatorComplex() {
// sorted by array hyperunique expression
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric("carExpr").threshold(4).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(ImmutableList.of(new CardinalityAggregatorFactory("car", ImmutableList.of(new DefaultDimensionSpec("quality", "quality")), false), new ExpressionLambdaAggregatorFactory("carExpr", ImmutableSet.of("quality"), null, "hyper_unique()", null, null, false, false, "hyper_unique_add(quality, __acc)", "hyper_unique_add(carExpr, __acc)", null, "hyper_unique_estimate(o)", null, TestExprMacroTable.INSTANCE))).build();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>builder().put(QueryRunnerTestHelper.MARKET_DIMENSION, "spot").put("car", 9.019833517963864).put("carExpr", 9.019833517963864).build(), ImmutableMap.<String, Object>builder().put(QueryRunnerTestHelper.MARKET_DIMENSION, "total_market").put("car", 2.000977198748901).put("carExpr", 2.000977198748901).build(), ImmutableMap.<String, Object>builder().put(QueryRunnerTestHelper.MARKET_DIMENSION, "upfront").put("car", 2.000977198748901).put("carExpr", 2.000977198748901).build()))));
assertExpectedResults(expectedResults, query);
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class TimeseriesQueryRunnerTest method testTimeseriesWithExpressionAggregatorTooBig.
@Test
public void testTimeseriesWithExpressionAggregatorTooBig() {
// expression agg cannot vectorize
cannotVectorize();
if (!vectorize) {
// size bytes when it overshoots varies slightly between algorithms
expectedException.expectMessage("Exceeded memory usage when aggregating type [ARRAY<STRING>]");
}
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(Granularities.DAY).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(Collections.singletonList(new ExpressionLambdaAggregatorFactory("array_agg_distinct", ImmutableSet.of(QueryRunnerTestHelper.MARKET_DIMENSION), "acc", "[]", null, null, true, false, "array_set_add(acc, market)", "array_set_add_all(acc, array_agg_distinct)", null, null, HumanReadableBytes.valueOf(10), TestExprMacroTable.INSTANCE))).descending(descending).context(makeContext()).build();
runner.run(QueryPlus.wrap(query)).toList();
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class GroupByTimeseriesQueryRunnerTest method testTimeseriesWithExpressionAggregatorTooBig.
@Override
public void testTimeseriesWithExpressionAggregatorTooBig() {
cannotVectorize();
if (!vectorize) {
// size bytes when it overshoots varies slightly between algorithms
expectedException.expectMessage("Unable to serialize [ARRAY<STRING>]");
}
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(Granularities.DAY).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(Collections.singletonList(new ExpressionLambdaAggregatorFactory("array_agg_distinct", ImmutableSet.of(QueryRunnerTestHelper.MARKET_DIMENSION), "acc", "[]", null, null, true, false, "array_set_add(acc, market)", "array_set_add_all(acc, array_agg_distinct)", null, null, HumanReadableBytes.valueOf(10), TestExprMacroTable.INSTANCE))).descending(descending).context(makeContext()).build();
runner.run(QueryPlus.wrap(query)).toList();
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testStringAgg.
@Test
public void testStringAgg() throws Exception {
cannotVectorize();
testQuery("SELECT STRING_AGG(dim1,','), STRING_AGG(DISTINCT dim1, ','), STRING_AGG(DISTINCT dim1,',') FILTER(WHERE dim1 = 'shazbot') FROM foo WHERE dim1 is not null", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(not(selector("dim1", null, null))).aggregators(aggregators(new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("dim1"), "__acc", "[]", "[]", true, false, false, "array_append(\"__acc\", \"dim1\")", "array_concat(\"__acc\", \"a0\")", null, "if(array_length(o) == 0, null, array_to_string(o, ','))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("dim1", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a1", ImmutableSet.of("dim1"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a1\")", null, "if(array_length(o) == 0, null, array_to_string(o, ','))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("dim1", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a2", ImmutableSet.of("dim1"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"dim1\")", "array_set_add_all(\"__acc\", \"a2\")", null, "if(array_length(o) == 0, null, array_to_string(o, ','))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("dim1", null, null)), selector("dim1", "shazbot", null))))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "10.1,2,1,def,abc", "1,10.1,2,abc,def", "" } : new Object[] { ",10.1,2,1,def,abc", ",1,10.1,2,abc,def", null }));
}
use of org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testStringAggExpression.
@Test
public void testStringAggExpression() throws Exception {
cannotVectorize();
testQuery("SELECT STRING_AGG(DISTINCT CONCAT(dim1, dim2), ','), STRING_AGG(DISTINCT CONCAT(dim1, dim2), CONCAT('|', '|')) FROM foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(\"dim1\",\"dim2\")", ColumnType.STRING)).aggregators(aggregators(new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("v0"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"v0\")", "array_set_add_all(\"__acc\", \"a0\")", null, "if(array_length(o) == 0, null, array_to_string(o, ','))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("v0", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a1", ImmutableSet.of("v0"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"v0\")", "array_set_add_all(\"__acc\", \"a1\")", null, "if(array_length(o) == 0, null, array_to_string(o, '||'))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("v0", null, null))))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "10.1,1a,2,a,abc,defabc", "10.1||1a||2||a||abc||defabc" } : new Object[] { "1a,2,a,defabc", "1a||2||a||defabc" }));
}
Aggregations