use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class BaseFilterTest method selectCountUsingVectorizedFilteredAggregator.
private long selectCountUsingVectorizedFilteredAggregator(final DimFilter dimFilter) {
Preconditions.checkState(makeFilter(dimFilter).canVectorizeMatcher(adapter), "Cannot vectorize filter: %s", dimFilter);
try (final VectorCursor cursor = makeVectorCursor(null)) {
final FilteredAggregatorFactory aggregatorFactory = new FilteredAggregatorFactory(new CountAggregatorFactory("count"), maybeOptimize(dimFilter));
final VectorAggregator aggregator = aggregatorFactory.factorizeVector(cursor.getColumnSelectorFactory());
final ByteBuffer buf = ByteBuffer.allocate(aggregatorFactory.getMaxIntermediateSizeWithNulls() * 2);
// Use two slots: one for each form of aggregate.
aggregator.init(buf, 0);
aggregator.init(buf, aggregatorFactory.getMaxIntermediateSizeWithNulls());
for (; !cursor.isDone(); cursor.advance()) {
aggregator.aggregate(buf, 0, 0, cursor.getCurrentVectorSize());
final int[] positions = new int[cursor.getCurrentVectorSize()];
Arrays.fill(positions, aggregatorFactory.getMaxIntermediateSizeWithNulls());
final int[] allRows = new int[cursor.getCurrentVectorSize()];
for (int i = 0; i < allRows.length; i++) {
allRows[i] = i;
}
aggregator.aggregate(buf, cursor.getCurrentVectorSize(), positions, allRows, 0);
}
final long val1 = (long) aggregator.get(buf, 0);
final long val2 = (long) aggregator.get(buf, aggregatorFactory.getMaxIntermediateSizeWithNulls());
if (val1 != val2) {
throw new ISE("Oh no, val1[%d] != val2[%d]", val1, val2);
}
return val1;
}
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testApproxCountDistinct.
@Test
public void testApproxCountDistinct() throws Exception {
// Cannot vectorize due to virtual columns.
cannotVectorize();
testQuery("SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT(dim2),\n" + // lowercase; also, filtered
" approx_count_distinct(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native hyperUnique column
" approx_count_distinct(unique_dim1)\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING)).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new CardinalityAggregatorFactory("a1", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), new FilteredAggregatorFactory(new CardinalityAggregatorFactory("a2", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), not(selector("dim2", "", null))), new CardinalityAggregatorFactory("a3", null, dimensions(new ExtractionDimensionSpec("dim2", "dim2", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))), false, true), new CardinalityAggregatorFactory("a4", null, dimensions(new DefaultDimensionSpec("v0", "v0", ColumnType.STRING)), false, true), new HyperUniquesAggregatorFactory("a5", "unique_dim1", false, true))).context(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L, 2L, 2L, 2L, 6L }) : ImmutableList.of(new Object[] { 6L, 3L, 2L, 1L, 1L, 6L }));
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testCountAndAverageByConstantVirtualColumn.
@Test
public void testCountAndAverageByConstantVirtualColumn() throws Exception {
List<VirtualColumn> virtualColumns;
List<AggregatorFactory> aggs;
if (useDefault) {
aggs = ImmutableList.of(new FilteredAggregatorFactory(new CountAggregatorFactory("a0"), not(selector("v0", null, null))), new LongSumAggregatorFactory("a1:sum", null, "325323", TestExprMacroTable.INSTANCE), new CountAggregatorFactory("a1:count"));
virtualColumns = ImmutableList.of(expressionVirtualColumn("v0", "'10.1'", ColumnType.STRING));
} else {
aggs = ImmutableList.of(new FilteredAggregatorFactory(new CountAggregatorFactory("a0"), not(selector("v0", null, null))), new LongSumAggregatorFactory("a1:sum", "v1"), new FilteredAggregatorFactory(new CountAggregatorFactory("a1:count"), not(selector("v1", null, null))));
virtualColumns = ImmutableList.of(expressionVirtualColumn("v0", "'10.1'", ColumnType.STRING), expressionVirtualColumn("v1", "325323", ColumnType.LONG));
}
testQuery("SELECT dim5, COUNT(dim1), AVG(l1) FROM druid.numfoo WHERE dim1 = '10.1' AND l1 = 325323 GROUP BY dim5", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE3).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(and(selector("dim1", "10.1", null), selector("l1", "325323", null))).setGranularity(Granularities.ALL).setVirtualColumns(VirtualColumns.create(virtualColumns)).setDimensions(new DefaultDimensionSpec("dim5", "_d0", ColumnType.STRING)).setAggregatorSpecs(aggs).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("a1", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "a1:sum"), new FieldAccessPostAggregator(null, "a1:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { "ab", 1L, 325323L }));
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testGroupByAggregatorDefaultValuesNonVectorized.
@Test
public void testGroupByAggregatorDefaultValuesNonVectorized() throws Exception {
cannotVectorize();
testQuery("SELECT\n" + " dim2,\n" + " ANY_VALUE(dim1, 1024) FILTER(WHERE dim1 = 'nonexistent'),\n" + " ANY_VALUE(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " EARLIEST(dim1, 1024) FILTER(WHERE dim1 = 'nonexistent'),\n" + " EARLIEST(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " LATEST(dim1, 1024) FILTER(WHERE dim1 = 'nonexistent'),\n" + " LATEST(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " ARRAY_AGG(DISTINCT dim3) FILTER(WHERE dim1 = 'nonexistent'),\n" + " STRING_AGG(DISTINCT dim3, '|') FILTER(WHERE dim1 = 'nonexistent'),\n" + " BIT_AND(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " BIT_OR(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " BIT_XOR(l1) FILTER(WHERE dim1 = 'nonexistent')\n" + "FROM druid.numfoo WHERE dim2 = 'a' GROUP BY dim2", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE3).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(selector("dim2", "a", null)).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)).setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new StringAnyAggregatorFactory("a0", "dim1", 1024), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new LongAnyAggregatorFactory("a1", "l1"), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new StringFirstAggregatorFactory("a2", "dim1", null, 1024), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new LongFirstAggregatorFactory("a3", "l1", null), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new StringLastAggregatorFactory("a4", "dim1", null, 1024), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new LongLastAggregatorFactory("a5", "l1", null), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a6", ImmutableSet.of("dim3"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim3\")", "array_set_add_all(\"__acc\", \"a6\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a7", ImmutableSet.of("dim3"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"dim3\")", "array_set_add_all(\"__acc\", \"a7\")", null, "if(array_length(o) == 0, null, array_to_string(o, '|'))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("dim3", null, null)), selector("dim1", "nonexistent", null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a8", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseAnd(\"__acc\", \"l1\")", "bitwiseAnd(\"__acc\", \"a8\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a9", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseOr(\"__acc\", \"l1\")", "bitwiseOr(\"__acc\", \"a9\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a10", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseXor(\"__acc\", \"l1\")", "bitwiseXor(\"__acc\", \"a10\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null))))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "a", "", 0L, "", 0L, "", 0L, null, "", 0L, 0L, 0L } : new Object[] { "a", null, null, null, null, null, null, null, null, null, null, null }));
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testStringAggMaxBytes.
@Test
public void testStringAggMaxBytes() throws Exception {
cannotVectorize();
testQuery("SELECT STRING_AGG(l1, ',', 128), STRING_AGG(DISTINCT l1, ',', 128) FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(aggregators(new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a0", ImmutableSet.of("l1"), "__acc", "[]", "[]", true, false, false, "array_append(\"__acc\", \"l1\")", "array_concat(\"__acc\", \"a0\")", null, "if(array_length(o) == 0, null, array_to_string(o, ','))", new HumanReadableBytes(128), TestExprMacroTable.INSTANCE), not(selector("l1", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a1", ImmutableSet.of("l1"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"l1\")", "array_set_add_all(\"__acc\", \"a1\")", null, "if(array_length(o) == 0, null, array_to_string(o, ','))", new HumanReadableBytes(128), TestExprMacroTable.INSTANCE), not(selector("l1", null, null))))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "7,325323,0,0,0,0", "0,325323,7" } : new Object[] { "7,325323,0", "0,325323,7" }));
}
Aggregations