Search in sources :

Example 1 with HllSketchMergeAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory in project druid by druid-io.

the class HllSketchBaseSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    // Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
    // for string columns.
    final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
    if (columnArg == null) {
        return null;
    }
    final int logK;
    if (aggregateCall.getArgList().size() >= 2) {
        final RexNode logKarg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
        if (!logKarg.isA(SqlKind.LITERAL)) {
            // logK must be a literal in order to plan.
            return null;
        }
        logK = ((Number) RexLiteral.value(logKarg)).intValue();
    } else {
        logK = HllSketchAggregatorFactory.DEFAULT_LG_K;
    }
    final String tgtHllType;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode tgtHllTypeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        if (!tgtHllTypeArg.isA(SqlKind.LITERAL)) {
            // tgtHllType must be a literal in order to plan.
            return null;
        }
        tgtHllType = RexLiteral.stringValue(tgtHllTypeArg);
    } else {
        tgtHllType = HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE.name();
    }
    final AggregatorFactory aggregatorFactory;
    final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
    if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
        aggregatorFactory = new HllSketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), logK, tgtHllType, ROUND);
    } else {
        final RelDataType dataType = columnRexNode.getType();
        final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
        if (inputType == null) {
            throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
        }
        final DimensionSpec dimensionSpec;
        if (columnArg.isDirectColumnAccess()) {
            dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
        } else {
            String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
            dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
        }
        aggregatorFactory = new HllSketchBuildAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), logK, tgtHllType, ROUND);
    }
    return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) RelDataType(org.apache.calcite.rel.type.RelDataType) ISE(org.apache.druid.java.util.common.ISE) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) HllSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 2 with HllSketchMergeAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory in project druid by druid-io.

the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.

@Test
public void testApproxCountDistinctHllSketch() throws Exception {
    // Can't vectorize due to SUBSTRING expression.
    cannotVectorize();
    final String sql = "SELECT\n" + "  SUM(cnt),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
    "  COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
    }
    testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

HllSketchBuildAggregatorFactory (org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory)2 HllSketchMergeAggregatorFactory (org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory)2 Nullable (javax.annotation.Nullable)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexNode (org.apache.calcite.rex.RexNode)1 ISE (org.apache.druid.java.util.common.ISE)1 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)1 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)1 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)1 HllSketchAggregatorFactory (org.apache.druid.query.aggregation.datasketches.hll.HllSketchAggregatorFactory)1 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)1 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)1 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)1 ColumnType (org.apache.druid.segment.column.ColumnType)1 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)1 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)1 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)1 Test (org.junit.Test)1