use of org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory in project druid by druid-io.
the class ThetaSketchSqlAggregatorTest method testThetaSketchPostAggs.
@Test
public void testThetaSketchPostAggs() throws Exception {
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 6L, 2.0d, 3.0d, "{\"estimate\":2.0,\"highBound\":2.0,\"lowBound\":2.0,\"numStdDev\":10}", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", "\"AgMDAAAazJMGAAAAAACAP1XTBztMIcMJ+HOoBBne1zKQxkPsNomrZUeWbJt3n+VpF8EdUoUHAXvxsLkOSE0lfQ==\"", "\"AQMDAAA6zJMXwR1ShQcBew==\"", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", 1.0d });
} else {
expectedResults = ImmutableList.of(new Object[] { 6L, 2.0d, 3.0d, "{\"estimate\":2.0,\"highBound\":2.0,\"lowBound\":2.0,\"numStdDev\":10}", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", "\"AgMDAAAazJMGAAAAAACAP1XTBztMIcMJ+HOoBBne1zKQxkPsNomrZUeWbJt3n+VpF8EdUoUHAXvxsLkOSE0lfQ==\"", "\"AQMDAAA6zJMXwR1ShQcBew==\"", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", 1.0d });
}
testQuery("SELECT\n" + " SUM(cnt),\n" + " theta_sketch_estimate(DS_THETA(dim2)),\n" + " theta_sketch_estimate(DS_THETA(CONCAT(dim2, 'hello'))),\n" + " theta_sketch_estimate_with_error_bounds(DS_THETA(dim2), 10),\n" + " THETA_SKETCH_INTERSECT(DS_THETA(dim2), DS_THETA(dim1)),\n" + " THETA_SKETCH_UNION(DS_THETA(dim2), DS_THETA(dim1)),\n" + " THETA_SKETCH_NOT(DS_THETA(dim2), DS_THETA(dim1)),\n" + " THETA_SKETCH_INTERSECT(32768, DS_THETA(dim2), DS_THETA(dim1)),\n" + " theta_sketch_estimate(THETA_SKETCH_INTERSECT(THETA_SKETCH_INTERSECT(DS_THETA(dim2), DS_THETA(dim1)), DS_THETA(dim2)))\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "concat(\"dim2\",'hello')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new SketchMergeAggregatorFactory("a1", "dim2", null, null, null, null), new SketchMergeAggregatorFactory("a2", "v0", null, null, null, null), new SketchMergeAggregatorFactory("a3", "dim1", null, null, null, null))).postAggregators(new SketchEstimatePostAggregator("p1", new FieldAccessPostAggregator("p0", "a1"), null), new SketchEstimatePostAggregator("p3", new FieldAccessPostAggregator("p2", "a2"), null), new SketchEstimatePostAggregator("p5", new FieldAccessPostAggregator("p4", "a1"), 10), new SketchSetPostAggregator("p8", "INTERSECT", null, ImmutableList.of(new FieldAccessPostAggregator("p6", "a1"), new FieldAccessPostAggregator("p7", "a3"))), new SketchSetPostAggregator("p11", "UNION", null, ImmutableList.of(new FieldAccessPostAggregator("p9", "a1"), new FieldAccessPostAggregator("p10", "a3"))), new SketchSetPostAggregator("p14", "NOT", null, ImmutableList.of(new FieldAccessPostAggregator("p12", "a1"), new FieldAccessPostAggregator("p13", "a3"))), new SketchSetPostAggregator("p17", "INTERSECT", 32768, ImmutableList.of(new FieldAccessPostAggregator("p15", "a1"), new FieldAccessPostAggregator("p16", "a3"))), new SketchEstimatePostAggregator("p23", new SketchSetPostAggregator("p22", "INTERSECT", null, ImmutableList.of(new SketchSetPostAggregator("p20", "INTERSECT", null, ImmutableList.of(new FieldAccessPostAggregator("p18", "a1"), new FieldAccessPostAggregator("p19", "a3"))), new FieldAccessPostAggregator("p21", "a1"))), null)).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory in project druid by druid-io.
the class ThetaSketchSqlAggregatorTest method testAvgDailyCountDistinctThetaSketch.
@Test
public void testAvgDailyCountDistinctThetaSketch() throws Exception {
// Can't vectorize due to outer query (it operates on an inlined data source, which cannot be vectorized).
cannotVectorize();
final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1L });
testQuery("SELECT\n" + " AVG(u)\n" + "FROM (SELECT FLOOR(__time TO DAY), APPROX_COUNT_DISTINCT_DS_THETA(cnt) AS u FROM druid.foo GROUP BY 1)", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)).aggregators(Collections.singletonList(new SketchMergeAggregatorFactory("a0:a", "cnt", null, null, null, null))).postAggregators(ImmutableList.of(new FinalizingFieldAccessPostAggregator("a0", "a0:a"))).context(TIMESERIES_CONTEXT_BY_GRAN).build().withOverriddenContext(BaseCalciteQueryTest.getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")))).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(NullHandling.replaceWithDefault() ? Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count")) : Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null))))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory in project druid by druid-io.
the class ThetaSketchSqlAggregatorTest method testApproxCountDistinctThetaSketch.
@Test
public void testApproxCountDistinctThetaSketch() throws Exception {
// Cannot vectorize due to SUBSTRING.
cannotVectorize();
final String sql = "SELECT\n" + " SUM(cnt),\n" + " APPROX_COUNT_DISTINCT_DS_THETA(dim2),\n" + // uppercase
" APPROX_COUNT_DISTINCT_DS_THETA(dim2) FILTER(WHERE dim2 <> ''),\n" + // lowercase; also, filtered
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on extractionFn, using A.C.D.
" COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on expression, using COUNT DISTINCT
" APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1, 32768),\n" + // on native theta sketch column
" APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1)\n" + // on native theta sketch column
"FROM druid.foo";
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
} else {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
}
testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new SketchMergeAggregatorFactory("a1", "dim2", null, null, null, null), new FilteredAggregatorFactory(new SketchMergeAggregatorFactory("a2", "dim2", null, null, null, null), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new SketchMergeAggregatorFactory("a3", "v0", null, null, null, null), new SketchMergeAggregatorFactory("a4", "v1", null, null, null, null), new SketchMergeAggregatorFactory("a5", "thetasketch_dim1", 32768, null, null, null), new SketchMergeAggregatorFactory("a6", "thetasketch_dim1", null, null, null, null))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory in project druid by druid-io.
the class ThetaSketchBaseSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
if (columnArg == null) {
return null;
}
final int sketchSize;
if (aggregateCall.getArgList().size() >= 2) {
final RexNode sketchSizeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!sketchSizeArg.isA(SqlKind.LITERAL)) {
// logK must be a literal in order to plan.
return null;
}
sketchSize = ((Number) RexLiteral.value(sketchSizeArg)).intValue();
} else {
sketchSize = SketchAggregatorFactory.DEFAULT_MAX_SKETCH_SIZE;
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new SketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), sketchSize, null, null, null);
} else {
final RelDataType dataType = columnRexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (columnArg.isDirectColumnAccess()) {
dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new SketchMergeAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), sketchSize, null, null, null);
}
return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
use of org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory in project druid by druid-io.
the class ThetaSketchSqlAggregatorTest method testThetaSketchPostAggsPostSort.
@Test
public void testThetaSketchPostAggsPostSort() throws Exception {
final String sql = "SELECT DS_THETA(dim2) as y FROM druid.foo ORDER BY THETA_SKETCH_ESTIMATE(DS_THETA(dim2)) DESC LIMIT 10";
final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 2.0d });
testQuery(StringUtils.format("SELECT THETA_SKETCH_ESTIMATE(y) from (%s)", sql), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new SketchMergeAggregatorFactory("a0", "dim2", null, null, null, null))).postAggregators(new FieldAccessPostAggregator("p0", "a0"), new SketchEstimatePostAggregator("p2", new FieldAccessPostAggregator("p1", "a0"), null), new SketchEstimatePostAggregator("s1", new FieldAccessPostAggregator("s0", "p0"), null)).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Aggregations