Search in sources :

Example 1 with ApproximateHistogramAggregatorFactory

use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.

the class QuantileSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = StringUtils.format("%s:agg", name);
    final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    if (!probabilityArg.isA(SqlKind.LITERAL)) {
        // Probability must be a literal in order to plan.
        return null;
    }
    final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
    final int resolution;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        if (!resolutionArg.isA(SqlKind.LITERAL)) {
            // Resolution must be a literal in order to plan.
            return null;
        }
        resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
    }
    final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
    final float lowerLimit = Float.NEGATIVE_INFINITY;
    final float upperLimit = Float.POSITIVE_INFINITY;
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (factory instanceof ApproximateHistogramAggregatorFactory) {
                final ApproximateHistogramAggregatorFactory theFactory = (ApproximateHistogramAggregatorFactory) factory;
                // Check input for equivalence.
                final boolean inputMatches;
                final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
                if (virtualInput == null) {
                    inputMatches = input.isDirectColumnAccess() && input.getDirectColumn().equals(theFactory.getFieldName());
                } else {
                    inputMatches = virtualInput.equals(input);
                }
                final boolean matches = inputMatches && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
                if (matches) {
                    // Found existing one. Use this.
                    return Aggregation.create(ImmutableList.of(), new QuantilePostAggregator(name, factory.getName(), probability));
                }
            }
        }
    }
    // No existing match found. Create a new one.
    if (input.isDirectColumnAccess()) {
        if (rowSignature.getColumnType(input.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
            aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
        } else {
            aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
        }
    } else {
        final String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
        aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, virtualColumnName, resolution, numBuckets, lowerLimit, upperLimit, false);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability));
}
Also used : Project(org.apache.calcite.rel.core.Project) SqlAggregator(org.apache.druid.sql.calcite.aggregation.SqlAggregator) ReturnTypes(org.apache.calcite.sql.type.ReturnTypes) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ApproximateHistogram(org.apache.druid.query.aggregation.histogram.ApproximateHistogram) ImmutableList(com.google.common.collect.ImmutableList) RexNode(org.apache.calcite.rex.RexNode) VirtualColumnRegistry(org.apache.druid.sql.calcite.rel.VirtualColumnRegistry) PlannerContext(org.apache.druid.sql.calcite.planner.PlannerContext) Nullable(javax.annotation.Nullable) SqlKind(org.apache.calcite.sql.SqlKind) SqlTypeFamily(org.apache.calcite.sql.type.SqlTypeFamily) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) RexBuilder(org.apache.calcite.rex.RexBuilder) RexLiteral(org.apache.calcite.rex.RexLiteral) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SqlFunctionCategory(org.apache.calcite.sql.SqlFunctionCategory) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) ValueType(org.apache.druid.segment.column.ValueType) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) List(java.util.List) Aggregations(org.apache.druid.sql.calcite.aggregation.Aggregations) RowSignature(org.apache.druid.segment.column.RowSignature) OperandTypes(org.apache.calcite.sql.type.OperandTypes) ColumnType(org.apache.druid.segment.column.ColumnType) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) Expressions(org.apache.druid.sql.calcite.expression.Expressions) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 2 with ApproximateHistogramAggregatorFactory

use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.

the class QuantileSqlAggregatorTest method testQuantileOnCastedString.

@Test
public void testQuantileOnCastedString() throws Exception {
    cannotVectorize();
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { "", 0.0d }, new Object[] { "a", 0.0d }, new Object[] { "b", 0.0d }, new Object[] { "c", 10.100000381469727d }, new Object[] { "d", 2.0d });
    } else {
        expectedResults = ImmutableList.of(new Object[] { null, Double.NaN }, new Object[] { "", 1.0d }, new Object[] { "a", Double.NaN }, new Object[] { "b", 10.100000381469727d }, new Object[] { "c", 10.100000381469727d }, new Object[] { "d", 2.0d });
    }
    testQuery("SELECT dim3, APPROX_QUANTILE(CAST(dim1 as DOUBLE), 0.5) from foo group by dim3", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setVirtualColumns(new ExpressionVirtualColumn("v0", "CAST(\"dim1\", 'DOUBLE')", ColumnType.FLOAT, ExprMacroTable.nil())).setDimensions(new DefaultDimensionSpec("dim3", "d0")).setAggregatorSpecs(new ApproximateHistogramAggregatorFactory("a0:agg", "v0", 50, 7, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, false)).setPostAggregatorSpecs(ImmutableList.of(new QuantilePostAggregator("a0", "a0:agg", 0.5f))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 3 with ApproximateHistogramAggregatorFactory

use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.

the class QuantileSqlAggregatorTest method createQuerySegmentWalker.

@Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker() throws IOException {
    ApproximateHistogramDruidModule.registerSerde();
    final QueryableIndex index = IndexBuilder.create(CalciteTests.getJsonMapper()).tmpDir(temporaryFolder.newFolder()).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new ApproximateHistogramAggregatorFactory("hist_m1", "m1", null, null, null, null, false)).withRollup(false).build()).rows(CalciteTests.ROWS1).buildMMappedIndex();
    return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index);
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(org.apache.druid.segment.QueryableIndex) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IndexBuilder(org.apache.druid.segment.IndexBuilder) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory)

Example 4 with ApproximateHistogramAggregatorFactory

use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.

the class QuantileSqlAggregatorTest method testQuantileOnInnerQuery.

@Test
public void testQuantileOnInnerQuery() throws Exception {
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 7.0, 8.26386833190918 });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 5.25, 6.59091854095459 });
    }
    testQuery("SELECT AVG(x), APPROX_QUANTILE(x, 0.98)\n" + "FROM (SELECT dim2, SUM(m1) AS x FROM foo GROUP BY dim2)", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("dim2", "d0")).setAggregatorSpecs(ImmutableList.of(new DoubleSumAggregatorFactory("a0", "m1"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(new DoubleSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count"), new ApproximateHistogramAggregatorFactory("_a1:agg", "a0", null, null, null, null, false)).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))), new QuantilePostAggregator("_a1", "_a1:agg", 0.98f))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) QueryDataSource(org.apache.druid.query.QueryDataSource) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

ApproximateHistogramAggregatorFactory (org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory)4 QuantilePostAggregator (org.apache.druid.query.aggregation.histogram.QuantilePostAggregator)3 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)2 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)2 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)2 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)2 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)2 ImmutableList (com.google.common.collect.ImmutableList)1 List (java.util.List)1 Nullable (javax.annotation.Nullable)1 AggregateCall (org.apache.calcite.rel.core.AggregateCall)1 Project (org.apache.calcite.rel.core.Project)1 RexBuilder (org.apache.calcite.rex.RexBuilder)1 RexLiteral (org.apache.calcite.rex.RexLiteral)1 RexNode (org.apache.calcite.rex.RexNode)1 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)1 SqlFunctionCategory (org.apache.calcite.sql.SqlFunctionCategory)1 SqlKind (org.apache.calcite.sql.SqlKind)1 OperandTypes (org.apache.calcite.sql.type.OperandTypes)1 ReturnTypes (org.apache.calcite.sql.type.ReturnTypes)1