Search in sources :

Example 1 with DoublesSketchAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory in project druid by druid-io.

the class DoublesSketchApproxQuantileSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = StringUtils.format("%s:agg", name);
    final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    if (!probabilityArg.isA(SqlKind.LITERAL)) {
        // Probability must be a literal in order to plan.
        return null;
    }
    final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
    final int k;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        if (!resolutionArg.isA(SqlKind.LITERAL)) {
            // Resolution must be a literal in order to plan.
            return null;
        }
        k = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        k = DoublesSketchAggregatorFactory.DEFAULT_K;
    }
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (factory instanceof DoublesSketchAggregatorFactory) {
                final DoublesSketchAggregatorFactory theFactory = (DoublesSketchAggregatorFactory) factory;
                // Check input for equivalence.
                final boolean inputMatches;
                final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
                if (virtualInput == null) {
                    inputMatches = input.isDirectColumnAccess() && input.getDirectColumn().equals(theFactory.getFieldName());
                } else {
                    inputMatches = virtualInput.equals(input);
                }
                final boolean matches = inputMatches && theFactory.getK() == k;
                if (matches) {
                    // Found existing one. Use this.
                    return Aggregation.create(ImmutableList.of(), new DoublesSketchToQuantilePostAggregator(name, new FieldAccessPostAggregator(factory.getName(), factory.getName()), probability));
                }
            }
        }
    }
    // No existing match found. Create a new one.
    if (input.isDirectColumnAccess()) {
        aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, input.getDirectColumn(), k, getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
        aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, virtualColumnName, k, getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), new DoublesSketchToQuantilePostAggregator(name, new FieldAccessPostAggregator(histogramName, histogramName), probability));
}
Also used : Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) DoublesSketchToQuantilePostAggregator(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchToQuantilePostAggregator) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 2 with DoublesSketchAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory in project druid by druid-io.

the class DoublesSketchSqlAggregatorTest method testQuantileOnCastedString.

@Test
public void testQuantileOnCastedString() throws Exception {
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 0.0, 1.0, 10.1, 10.1, 20.2, 0.0, 10.1, 0.0 });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 1.0, 2.0, 10.1, 10.1, 20.2, Double.NaN, 10.1, Double.NaN });
    }
    testQuery("SELECT\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.01),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.5, 64),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.98, 256),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.99),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE) * 2, 0.97),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.99) FILTER(WHERE dim2 = 'abc'),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.999) FILTER(WHERE dim2 <> 'abc'),\n" + "APPROX_QUANTILE_DS(CAST(dim1 as DOUBLE), 0.999) FILTER(WHERE dim2 = 'abc')\n" + "FROM foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "CAST(\"dim1\", 'DOUBLE')", ColumnType.FLOAT, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "(CAST(\"dim1\", 'DOUBLE') * 2)", ColumnType.FLOAT, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new DoublesSketchAggregatorFactory("a0:agg", "v0", 128), new DoublesSketchAggregatorFactory("a1:agg", "v0", 64), new DoublesSketchAggregatorFactory("a2:agg", "v0", 256), new DoublesSketchAggregatorFactory("a4:agg", "v1", 128), new FilteredAggregatorFactory(new DoublesSketchAggregatorFactory("a5:agg", "v0", 128), new SelectorDimFilter("dim2", "abc", null)), new FilteredAggregatorFactory(new DoublesSketchAggregatorFactory("a6:agg", "v0", 128), new NotDimFilter(new SelectorDimFilter("dim2", "abc", null))))).postAggregators(new DoublesSketchToQuantilePostAggregator("a0", makeFieldAccessPostAgg("a0:agg"), 0.01f), new DoublesSketchToQuantilePostAggregator("a1", makeFieldAccessPostAgg("a1:agg"), 0.50f), new DoublesSketchToQuantilePostAggregator("a2", makeFieldAccessPostAgg("a2:agg"), 0.98f), new DoublesSketchToQuantilePostAggregator("a3", makeFieldAccessPostAgg("a0:agg"), 0.99f), new DoublesSketchToQuantilePostAggregator("a4", makeFieldAccessPostAgg("a4:agg"), 0.97f), new DoublesSketchToQuantilePostAggregator("a5", makeFieldAccessPostAgg("a5:agg"), 0.99f), new DoublesSketchToQuantilePostAggregator("a6", makeFieldAccessPostAgg("a6:agg"), 0.999f), new DoublesSketchToQuantilePostAggregator("a7", makeFieldAccessPostAgg("a5:agg"), 0.999f)).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) NotDimFilter(org.apache.druid.query.filter.NotDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DoublesSketchToQuantilePostAggregator(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchToQuantilePostAggregator) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 3 with DoublesSketchAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory in project druid by druid-io.

the class DoublesSketchObjectSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = StringUtils.format("%s:agg", name);
    final int k;
    if (aggregateCall.getArgList().size() >= 2) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
        if (!resolutionArg.isA(SqlKind.LITERAL)) {
            // Resolution must be a literal in order to plan.
            return null;
        }
        k = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        k = DoublesSketchAggregatorFactory.DEFAULT_K;
    }
    // No existing match found. Create a new one.
    if (input.isDirectColumnAccess()) {
        aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, input.getDirectColumn(), k, DoublesSketchApproxQuantileSqlAggregator.getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
        aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, virtualColumnName, k, DoublesSketchApproxQuantileSqlAggregator.getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), null);
}
Also used : DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 4 with DoublesSketchAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory in project druid by druid-io.

the class DoublesSketchSqlAggregatorTest method createQuerySegmentWalker.

@Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker() throws IOException {
    DoublesSketchModule.registerSerde();
    final QueryableIndex index = IndexBuilder.create(CalciteTests.getJsonMapper()).tmpDir(temporaryFolder.newFolder()).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new DoublesSketchAggregatorFactory("qsketch_m1", "m1", 128)).withRollup(false).build()).rows(CalciteTests.ROWS1).buildMMappedIndex();
    return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index);
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(org.apache.druid.segment.QueryableIndex) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IndexBuilder(org.apache.druid.segment.IndexBuilder) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory)

Example 5 with DoublesSketchAggregatorFactory

use of org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory in project druid by druid-io.

the class DoublesSketchSqlAggregatorTest method testQuantileOnInnerQuery.

@Test
public void testQuantileOnInnerQuery() throws Exception {
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 7.0, 11.0 });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 5.25, 8.0 });
    }
    testQuery("SELECT AVG(x), APPROX_QUANTILE_DS(x, 0.98)\n" + "FROM (SELECT dim2, SUM(m1) AS x FROM foo GROUP BY dim2)", Collections.singletonList(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("dim2", "d0")).setAggregatorSpecs(ImmutableList.of(new DoubleSumAggregatorFactory("a0", "m1"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(new DoubleSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count"), new DoublesSketchAggregatorFactory("_a1:agg", "a0", null)).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))), new DoublesSketchToQuantilePostAggregator("_a1", makeFieldAccessPostAgg("_a1:agg"), 0.98f))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) QueryDataSource(org.apache.druid.query.QueryDataSource) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoublesSketchToQuantilePostAggregator(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchToQuantilePostAggregator) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

DoublesSketchAggregatorFactory (org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory)7 DoublesSketchToQuantilePostAggregator (org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchToQuantilePostAggregator)5 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)4 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)4 Test (org.junit.Test)4 Nullable (javax.annotation.Nullable)2 RexNode (org.apache.calcite.rex.RexNode)2 QueryDataSource (org.apache.druid.query.QueryDataSource)2 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)2 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)2 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)2 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)2 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)2 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)2 ImmutableList (com.google.common.collect.ImmutableList)1 HashMap (java.util.HashMap)1 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)1 ArithmeticPostAggregator (org.apache.druid.query.aggregation.post.ArithmeticPostAggregator)1 NotDimFilter (org.apache.druid.query.filter.NotDimFilter)1 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)1