use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByCardinalityAggWithExtractionFn.
@Test
public void testGroupByCardinalityAggWithExtractionFn() {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
String helloJsFn = "function(str) { return 'hello' }";
ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getEnabledInstance());
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("market", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new CardinalityAggregatorFactory("numVals", ImmutableList.of(new ExtractionDimensionSpec(QueryRunnerTestHelper.QUALITY_DIMENSION, QueryRunnerTestHelper.QUALITY_DIMENSION, helloFn)), false)).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), makeRow(query, "2011-04-01", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), makeRow(query, "2011-04-01", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d), makeRow(query, "2011-04-02", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), makeRow(query, "2011-04-02", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), makeRow(query, "2011-04-02", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "cardinality-agg");
}
use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.
the class AggregatorFactoryTest method testResultArraySignature.
@Test
public void testResultArraySignature() {
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2000/3000").granularity(Granularities.HOUR).aggregators(new CountAggregatorFactory("count"), new JavaScriptAggregatorFactory("js", ImmutableList.of("col"), "function(a,b) { return a + b; }", "function() { return 0; }", "function(a,b) { return a + b }", new JavaScriptConfig(true)), // long aggs
new LongSumAggregatorFactory("longSum", "long-col"), new LongMinAggregatorFactory("longMin", "long-col"), new LongMaxAggregatorFactory("longMax", "long-col"), new LongFirstAggregatorFactory("longFirst", "long-col", null), new LongLastAggregatorFactory("longLast", "long-col", null), new LongAnyAggregatorFactory("longAny", "long-col"), // double aggs
new DoubleSumAggregatorFactory("doubleSum", "double-col"), new DoubleMinAggregatorFactory("doubleMin", "double-col"), new DoubleMaxAggregatorFactory("doubleMax", "double-col"), new DoubleFirstAggregatorFactory("doubleFirst", "double-col", null), new DoubleLastAggregatorFactory("doubleLast", "double-col", null), new DoubleAnyAggregatorFactory("doubleAny", "double-col"), new DoubleMeanAggregatorFactory("doubleMean", "double-col"), // float aggs
new FloatSumAggregatorFactory("floatSum", "float-col"), new FloatMinAggregatorFactory("floatMin", "float-col"), new FloatMaxAggregatorFactory("floatMax", "float-col"), new FloatFirstAggregatorFactory("floatFirst", "float-col", null), new FloatLastAggregatorFactory("floatLast", "float-col", null), new FloatAnyAggregatorFactory("floatAny", "float-col"), // string aggregators
new StringFirstAggregatorFactory("stringFirst", "col", null, 1024), new StringLastAggregatorFactory("stringLast", "col", null, 1024), new StringAnyAggregatorFactory("stringAny", "col", 1024), // sketch aggs
new CardinalityAggregatorFactory("cardinality", ImmutableList.of(DefaultDimensionSpec.of("some-col")), false), new HyperUniquesAggregatorFactory("hyperUnique", "hyperunique"), new HistogramAggregatorFactory("histogram", "histogram", ImmutableList.of(0.25f, 0.5f, 0.75f)), // delegate aggs
new FilteredAggregatorFactory(new HyperUniquesAggregatorFactory("filtered", "hyperunique"), new SelectorDimFilter("col", "hello", null)), new SuppressedAggregatorFactory(new HyperUniquesAggregatorFactory("suppressed", "hyperunique"))).postAggregators(new FinalizingFieldAccessPostAggregator("count-finalize", "count"), new FinalizingFieldAccessPostAggregator("js-finalize", "js"), // long aggs
new FinalizingFieldAccessPostAggregator("longSum-finalize", "longSum"), new FinalizingFieldAccessPostAggregator("longMin-finalize", "longMin"), new FinalizingFieldAccessPostAggregator("longMax-finalize", "longMax"), new FinalizingFieldAccessPostAggregator("longFirst-finalize", "longFirst"), new FinalizingFieldAccessPostAggregator("longLast-finalize", "longLast"), new FinalizingFieldAccessPostAggregator("longAny-finalize", "longAny"), // double
new FinalizingFieldAccessPostAggregator("doubleSum-finalize", "doubleSum"), new FinalizingFieldAccessPostAggregator("doubleMin-finalize", "doubleMin"), new FinalizingFieldAccessPostAggregator("doubleMax-finalize", "doubleMax"), new FinalizingFieldAccessPostAggregator("doubleFirst-finalize", "doubleFirst"), new FinalizingFieldAccessPostAggregator("doubleLast-finalize", "doubleLast"), new FinalizingFieldAccessPostAggregator("doubleAny-finalize", "doubleAny"), new FinalizingFieldAccessPostAggregator("doubleMean-finalize", "doubleMean"), // finalized floats
new FinalizingFieldAccessPostAggregator("floatSum-finalize", "floatSum"), new FinalizingFieldAccessPostAggregator("floatMin-finalize", "floatMin"), new FinalizingFieldAccessPostAggregator("floatMax-finalize", "floatMax"), new FinalizingFieldAccessPostAggregator("floatFirst-finalize", "floatFirst"), new FinalizingFieldAccessPostAggregator("floatLast-finalize", "floatLast"), new FinalizingFieldAccessPostAggregator("floatAny-finalize", "floatAny"), // finalized strings
new FinalizingFieldAccessPostAggregator("stringFirst-finalize", "stringFirst"), new FinalizingFieldAccessPostAggregator("stringLast-finalize", "stringLast"), new FinalizingFieldAccessPostAggregator("stringAny-finalize", "stringAny"), // finalized sketch
new FinalizingFieldAccessPostAggregator("cardinality-finalize", "cardinality"), new FinalizingFieldAccessPostAggregator("hyperUnique-finalize", "hyperUnique"), new FinalizingFieldAccessPostAggregator("histogram-finalize", "histogram"), // finalized delegate
new FinalizingFieldAccessPostAggregator("filtered-finalize", "filtered"), new FinalizingFieldAccessPostAggregator("suppressed-finalize", "suppressed")).build();
Assert.assertEquals(RowSignature.builder().addTimeColumn().add("count", ColumnType.LONG).add("js", ColumnType.FLOAT).add("longSum", ColumnType.LONG).add("longMin", ColumnType.LONG).add("longMax", ColumnType.LONG).add("longFirst", ColumnType.LONG).add("longLast", ColumnType.LONG).add("longAny", ColumnType.LONG).add("doubleSum", ColumnType.DOUBLE).add("doubleMin", ColumnType.DOUBLE).add("doubleMax", ColumnType.DOUBLE).add("doubleFirst", ColumnType.DOUBLE).add("doubleLast", ColumnType.DOUBLE).add("doubleAny", ColumnType.DOUBLE).add("doubleMean", null).add("floatSum", ColumnType.FLOAT).add("floatMin", ColumnType.FLOAT).add("floatMax", ColumnType.FLOAT).add("floatFirst", ColumnType.FLOAT).add("floatLast", ColumnType.FLOAT).add("floatAny", ColumnType.FLOAT).add("stringFirst", null).add("stringLast", null).add("stringAny", ColumnType.STRING).add("cardinality", null).add("hyperUnique", null).add("histogram", null).add("filtered", null).add("suppressed", null).add("count-finalize", ColumnType.LONG).add("js-finalize", ColumnType.FLOAT).add("longSum-finalize", ColumnType.LONG).add("longMin-finalize", ColumnType.LONG).add("longMax-finalize", ColumnType.LONG).add("longFirst-finalize", ColumnType.LONG).add("longLast-finalize", ColumnType.LONG).add("longAny-finalize", ColumnType.LONG).add("doubleSum-finalize", ColumnType.DOUBLE).add("doubleMin-finalize", ColumnType.DOUBLE).add("doubleMax-finalize", ColumnType.DOUBLE).add("doubleFirst-finalize", ColumnType.DOUBLE).add("doubleLast-finalize", ColumnType.DOUBLE).add("doubleAny-finalize", ColumnType.DOUBLE).add("doubleMean-finalize", ColumnType.DOUBLE).add("floatSum-finalize", ColumnType.FLOAT).add("floatMin-finalize", ColumnType.FLOAT).add("floatMax-finalize", ColumnType.FLOAT).add("floatFirst-finalize", ColumnType.FLOAT).add("floatLast-finalize", ColumnType.FLOAT).add("floatAny-finalize", ColumnType.FLOAT).add("stringFirst-finalize", ColumnType.STRING).add("stringLast-finalize", ColumnType.STRING).add("stringAny-finalize", ColumnType.STRING).add("cardinality-finalize", ColumnType.DOUBLE).add("hyperUnique-finalize", ColumnType.DOUBLE).add("histogram-finalize", HistogramAggregatorFactory.TYPE_VISUAL).add("filtered-finalize", ColumnType.DOUBLE).add("suppressed-finalize", ColumnType.DOUBLE).build(), new TimeseriesQueryQueryToolChest().resultArraySignature(query));
}
use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.
the class BuiltinApproxCountDistinctSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
final DruidExpression arg = Expressions.toDruidExpression(plannerContext, rowSignature, rexNode);
if (arg == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (arg.isDirectColumnAccess() && rowSignature.getColumnType(arg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true);
} else {
final RelDataType dataType = rexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (arg.isSimpleExtraction()) {
dimensionSpec = arg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(arg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new CardinalityAggregatorFactory(aggregatorName, null, ImmutableList.of(dimensionSpec), false, true);
}
return Aggregation.create(Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null);
}
use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithOuterCardinalityAggregator.
@Test
public void testSubqueryWithOuterCardinalityAggregator() {
final GroupByQuery subquery = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("index", "index")).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
final GroupByQuery query = makeQueryBuilder().setDataSource(subquery).setQuerySegmentSpec(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setDimensions(Collections.emptyList()).setAggregatorSpecs(new CardinalityAggregatorFactory("car", ImmutableList.of(new DefaultDimensionSpec("quality", "quality")), false)).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<ResultRow> expectedResults = Collections.singletonList(makeRow(query, "1970-01-01", "car", QueryRunnerTestHelper.UNIQUES_9));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "subquery-cardinality");
}
use of org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithExpressionAggregatorWithComplex.
@Test
public void testGroupByWithExpressionAggregatorWithComplex() {
cannotVectorize();
final GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setDimensions(Collections.emptyList()).setAggregatorSpecs(new CardinalityAggregatorFactory("car", ImmutableList.of(new DefaultDimensionSpec("quality", "quality")), false), new ExpressionLambdaAggregatorFactory("carExpr", ImmutableSet.of("quality"), null, "hyper_unique()", null, null, false, false, "hyper_unique_add(quality, __acc)", "hyper_unique_add(carExpr, __acc)", null, "hyper_unique_estimate(o)", null, TestExprMacroTable.INSTANCE)).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<ResultRow> expectedResults = Collections.singletonList(makeRow(query, "1970-01-01", "car", QueryRunnerTestHelper.UNIQUES_9, "carExpr", QueryRunnerTestHelper.UNIQUES_9));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "subquery-cardinality");
}
Aggregations