use of org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory in project druid by druid-io.
the class HllSketchBaseSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
if (columnArg == null) {
return null;
}
final int logK;
if (aggregateCall.getArgList().size() >= 2) {
final RexNode logKarg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!logKarg.isA(SqlKind.LITERAL)) {
// logK must be a literal in order to plan.
return null;
}
logK = ((Number) RexLiteral.value(logKarg)).intValue();
} else {
logK = HllSketchAggregatorFactory.DEFAULT_LG_K;
}
final String tgtHllType;
if (aggregateCall.getArgList().size() >= 3) {
final RexNode tgtHllTypeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
if (!tgtHllTypeArg.isA(SqlKind.LITERAL)) {
// tgtHllType must be a literal in order to plan.
return null;
}
tgtHllType = RexLiteral.stringValue(tgtHllTypeArg);
} else {
tgtHllType = HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE.name();
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new HllSketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), logK, tgtHllType, ROUND);
} else {
final RelDataType dataType = columnRexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (columnArg.isDirectColumnAccess()) {
dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new HllSketchBuildAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), logK, tgtHllType, ROUND);
}
return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
use of org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory in project druid by druid-io.
the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.
@Test
public void testApproxCountDistinctHllSketch() throws Exception {
// Can't vectorize due to SUBSTRING expression.
cannotVectorize();
final String sql = "SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
" APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
" COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
} else {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
}
testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Aggregations