use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.
the class QuantileSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String histogramName = StringUtils.format("%s:agg", name);
final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!probabilityArg.isA(SqlKind.LITERAL)) {
// Probability must be a literal in order to plan.
return null;
}
final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
final int resolution;
if (aggregateCall.getArgList().size() >= 3) {
final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
if (!resolutionArg.isA(SqlKind.LITERAL)) {
// Resolution must be a literal in order to plan.
return null;
}
resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
} else {
resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
}
final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
final float lowerLimit = Float.NEGATIVE_INFINITY;
final float upperLimit = Float.POSITIVE_INFINITY;
// Look for existing matching aggregatorFactory.
for (final Aggregation existing : existingAggregations) {
for (AggregatorFactory factory : existing.getAggregatorFactories()) {
if (factory instanceof ApproximateHistogramAggregatorFactory) {
final ApproximateHistogramAggregatorFactory theFactory = (ApproximateHistogramAggregatorFactory) factory;
// Check input for equivalence.
final boolean inputMatches;
final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
if (virtualInput == null) {
inputMatches = input.isDirectColumnAccess() && input.getDirectColumn().equals(theFactory.getFieldName());
} else {
inputMatches = virtualInput.equals(input);
}
final boolean matches = inputMatches && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
if (matches) {
// Found existing one. Use this.
return Aggregation.create(ImmutableList.of(), new QuantilePostAggregator(name, factory.getName(), probability));
}
}
}
}
// No existing match found. Create a new one.
if (input.isDirectColumnAccess()) {
if (rowSignature.getColumnType(input.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
} else {
aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
}
} else {
final String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, virtualColumnName, resolution, numBuckets, lowerLimit, upperLimit, false);
}
return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability));
}
use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.
the class QuantileSqlAggregatorTest method testQuantileOnCastedString.
@Test
public void testQuantileOnCastedString() throws Exception {
cannotVectorize();
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { "", 0.0d }, new Object[] { "a", 0.0d }, new Object[] { "b", 0.0d }, new Object[] { "c", 10.100000381469727d }, new Object[] { "d", 2.0d });
} else {
expectedResults = ImmutableList.of(new Object[] { null, Double.NaN }, new Object[] { "", 1.0d }, new Object[] { "a", Double.NaN }, new Object[] { "b", 10.100000381469727d }, new Object[] { "c", 10.100000381469727d }, new Object[] { "d", 2.0d });
}
testQuery("SELECT dim3, APPROX_QUANTILE(CAST(dim1 as DOUBLE), 0.5) from foo group by dim3", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setVirtualColumns(new ExpressionVirtualColumn("v0", "CAST(\"dim1\", 'DOUBLE')", ColumnType.FLOAT, ExprMacroTable.nil())).setDimensions(new DefaultDimensionSpec("dim3", "d0")).setAggregatorSpecs(new ApproximateHistogramAggregatorFactory("a0:agg", "v0", 50, 7, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, false)).setPostAggregatorSpecs(ImmutableList.of(new QuantilePostAggregator("a0", "a0:agg", 0.5f))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.
the class QuantileSqlAggregatorTest method createQuerySegmentWalker.
@Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker() throws IOException {
ApproximateHistogramDruidModule.registerSerde();
final QueryableIndex index = IndexBuilder.create(CalciteTests.getJsonMapper()).tmpDir(temporaryFolder.newFolder()).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new ApproximateHistogramAggregatorFactory("hist_m1", "m1", null, null, null, null, false)).withRollup(false).build()).rows(CalciteTests.ROWS1).buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index);
}
use of org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory in project druid by druid-io.
the class QuantileSqlAggregatorTest method testQuantileOnInnerQuery.
@Test
public void testQuantileOnInnerQuery() throws Exception {
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 7.0, 8.26386833190918 });
} else {
expectedResults = ImmutableList.of(new Object[] { 5.25, 6.59091854095459 });
}
testQuery("SELECT AVG(x), APPROX_QUANTILE(x, 0.98)\n" + "FROM (SELECT dim2, SUM(m1) AS x FROM foo GROUP BY dim2)", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("dim2", "d0")).setAggregatorSpecs(ImmutableList.of(new DoubleSumAggregatorFactory("a0", "m1"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(new DoubleSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count"), new ApproximateHistogramAggregatorFactory("_a1:agg", "a0", null, null, null, null, false)).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))), new QuantilePostAggregator("_a1", "_a1:agg", 0.98f))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Aggregations