use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class GroupByRules method translateAggregateCall.
/**
* Translate an AggregateCall to Druid equivalents.
*
* @return translated aggregation, or null if translation failed.
*/
private static Aggregation translateAggregateCall(final PlannerContext plannerContext, final RowSignature sourceRowSignature, final Project project, final AggregateCall call, final DruidOperatorTable operatorTable, final List<Aggregation> existingAggregations, final int aggNumber, final boolean approximateCountDistinct) {
final List<DimFilter> filters = Lists.newArrayList();
final List<String> rowOrder = sourceRowSignature.getRowOrder();
final String name = aggOutputName(aggNumber);
final SqlKind kind = call.getAggregation().getKind();
final SqlTypeName outputType = call.getType().getSqlTypeName();
if (call.filterArg >= 0) {
// AGG(xxx) FILTER(WHERE yyy)
if (project == null) {
// We need some kind of projection to support filtered aggregations.
return null;
}
final RexNode expression = project.getChildExps().get(call.filterArg);
final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, expression);
if (filter == null) {
return null;
}
filters.add(filter);
}
if (kind == SqlKind.COUNT && call.getArgList().isEmpty()) {
// COUNT(*)
return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
} else if (kind == SqlKind.COUNT && call.isDistinct()) {
// COUNT(DISTINCT x)
return approximateCountDistinct ? APPROX_COUNT_DISTINCT.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
} else if (kind == SqlKind.COUNT || kind == SqlKind.SUM || kind == SqlKind.SUM0 || kind == SqlKind.MIN || kind == SqlKind.MAX || kind == SqlKind.AVG) {
// Built-in agg, not distinct, not COUNT(*)
boolean forceCount = false;
final FieldOrExpression input;
final int inputField = Iterables.getOnlyElement(call.getArgList());
final RexNode rexNode = Expressions.fromFieldAccess(sourceRowSignature, project, inputField);
final FieldOrExpression foe = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, rexNode);
if (foe != null) {
input = foe;
} else if (rexNode.getKind() == SqlKind.CASE && ((RexCall) rexNode).getOperands().size() == 3) {
// Possibly a CASE-style filtered aggregation. Styles supported:
// A: SUM(CASE WHEN x = 'foo' THEN cnt END) => operands (x = 'foo', cnt, null)
// B: SUM(CASE WHEN x = 'foo' THEN 1 ELSE 0 END) => operands (x = 'foo', 1, 0)
// C: COUNT(CASE WHEN x = 'foo' THEN 'dummy' END) => operands (x = 'foo', 'dummy', null)
// If the null and non-null args are switched, "flip" is set, which negates the filter.
final RexCall caseCall = (RexCall) rexNode;
final boolean flip = RexLiteral.isNullLiteral(caseCall.getOperands().get(1)) && !RexLiteral.isNullLiteral(caseCall.getOperands().get(2));
final RexNode arg1 = caseCall.getOperands().get(flip ? 2 : 1);
final RexNode arg2 = caseCall.getOperands().get(flip ? 1 : 2);
// Operand 1: Filter
final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, caseCall.getOperands().get(0));
if (filter == null) {
return null;
} else {
filters.add(flip ? new NotDimFilter(filter) : filter);
}
if (call.getAggregation().getKind() == SqlKind.COUNT && arg1 instanceof RexLiteral && !RexLiteral.isNullLiteral(arg1) && RexLiteral.isNullLiteral(arg2)) {
// Case C
forceCount = true;
input = null;
} else if (call.getAggregation().getKind() == SqlKind.SUM && arg1 instanceof RexLiteral && ((Number) RexLiteral.value(arg1)).intValue() == 1 && arg2 instanceof RexLiteral && ((Number) RexLiteral.value(arg2)).intValue() == 0) {
// Case B
forceCount = true;
input = null;
} else if (RexLiteral.isNullLiteral(arg2)) {
// Maybe case A
input = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, arg1);
if (input == null) {
return null;
}
} else {
// Can't translate CASE into a filter.
return null;
}
} else {
// Can't translate operand.
return null;
}
if (!forceCount) {
Preconditions.checkNotNull(input, "WTF?! input was null for non-COUNT aggregation");
}
if (forceCount || kind == SqlKind.COUNT) {
// COUNT(x)
return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
} else {
// Built-in aggregator that is not COUNT.
final Aggregation retVal;
final String fieldName = input.getFieldName();
final String expression = input.getExpression();
final boolean isLong = SqlTypeName.INT_TYPES.contains(outputType) || SqlTypeName.TIMESTAMP == outputType || SqlTypeName.DATE == outputType;
if (kind == SqlKind.SUM || kind == SqlKind.SUM0) {
retVal = isLong ? Aggregation.create(new LongSumAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleSumAggregatorFactory(name, fieldName, expression));
} else if (kind == SqlKind.MIN) {
retVal = isLong ? Aggregation.create(new LongMinAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMinAggregatorFactory(name, fieldName, expression));
} else if (kind == SqlKind.MAX) {
retVal = isLong ? Aggregation.create(new LongMaxAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMaxAggregatorFactory(name, fieldName, expression));
} else if (kind == SqlKind.AVG) {
final String sumName = aggInternalName(aggNumber, "sum");
final String countName = aggInternalName(aggNumber, "count");
final AggregatorFactory sum = isLong ? new LongSumAggregatorFactory(sumName, fieldName, expression) : new DoubleSumAggregatorFactory(sumName, fieldName, expression);
final AggregatorFactory count = new CountAggregatorFactory(countName);
retVal = Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
} else {
// Not reached.
throw new ISE("WTF?! Kind[%s] got into the built-in aggregator path somehow?!", kind);
}
return retVal.filter(makeFilter(filters, sourceRowSignature));
}
} else {
// Not a built-in aggregator, check operator table.
final SqlAggregator sqlAggregator = operatorTable.lookupAggregator(call.getAggregation().getName());
return sqlAggregator != null ? sqlAggregator.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
}
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class ApproxCountDistinctSqlAggregator method toDruidAggregation.
@Override
public Aggregation toDruidAggregation(final String name, final RowSignature rowSignature, final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<Aggregation> existingAggregations, final Project project, final AggregateCall aggregateCall, final DimFilter filter) {
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), rexNode);
if (rex == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
if (rowSignature.getColumnType(rex.getColumn()) == ValueType.COMPLEX) {
aggregatorFactory = new HyperUniquesAggregatorFactory(name, rex.getColumn());
} else {
final SqlTypeName sqlTypeName = rexNode.getType().getSqlTypeName();
final ValueType outputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName);
if (outputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, name);
}
final DimensionSpec dimensionSpec = rex.toDimensionSpec(rowSignature, null, ValueType.STRING);
if (dimensionSpec == null) {
return null;
}
aggregatorFactory = new CardinalityAggregatorFactory(name, ImmutableList.of(dimensionSpec), false);
}
return Aggregation.createFinalizable(ImmutableList.<AggregatorFactory>of(aggregatorFactory), null, new PostAggregatorFactory() {
@Override
public PostAggregator factorize(String outputName) {
return new HyperUniqueFinalizingPostAggregator(outputName, name);
}
}).filter(filter);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DruidParquetReadSupport method getPartialReadSchema.
private MessageType getPartialReadSchema(InitContext context) {
MessageType fullSchema = context.getFileSchema();
String name = fullSchema.getName();
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn();
List<DimensionSchema> dimensionSchema = config.getParser().getParseSpec().getDimensionsSpec().getDimensions();
Set<String> dimensions = Sets.newHashSet();
for (DimensionSchema dim : dimensionSchema) {
dimensions.add(dim.getName());
}
Set<String> metricsFields = Sets.newHashSet();
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
metricsFields.addAll(agg.requiredFields());
}
List<Type> partialFields = Lists.newArrayList();
for (Type type : fullSchema.getFields()) {
if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
partialFields.add(type);
}
}
return new MessageType(name, partialFields);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class IncrementalIndexStorageAdapterTest method testSingleValueTopN.
@Test
public void testSingleValueTopN() throws IOException {
IncrementalIndex index = indexCreator.createIndex();
DateTime t = DateTime.now();
index.add(new MapBasedInputRow(t.minus(1).getMillis(), Lists.newArrayList("sally"), ImmutableMap.<String, Object>of("sally", "bo")));
TopNQueryEngine engine = new TopNQueryEngine(new StupidPool<ByteBuffer>("TopNQueryEngine-bufferPool", new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
return ByteBuffer.allocate(50000);
}
}));
final Iterable<Result<TopNResultValue>> results = Sequences.toList(engine.query(new TopNQueryBuilder().dataSource("test").granularity(Granularities.ALL).intervals(Lists.newArrayList(new Interval(0, new DateTime().getMillis()))).dimension("sally").metric("cnt").threshold(10).aggregators(Lists.<AggregatorFactory>newArrayList(new LongSumAggregatorFactory("cnt", "cnt"))).build(), new IncrementalIndexStorageAdapter(index)), Lists.<Result<TopNResultValue>>newLinkedList());
Assert.assertEquals(1, Iterables.size(results));
Assert.assertEquals(1, results.iterator().next().getValue().getValue().size());
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class SpatialFilterTest method testSpatialQueryWithOtherSpatialDim.
@Test
public void testSpatialQueryWithOtherSpatialDim() {
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("test").granularity(Granularities.ALL).intervals(Arrays.asList(new Interval("2013-01-01/2013-01-07"))).filters(new SpatialDimFilter("spatialIsRad", new RadiusBound(new float[] { 0.0f, 0.0f }, 5))).aggregators(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("val", "val"))).build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(new Result<TimeseriesResultValue>(new DateTime("2013-01-01T00:00:00.000Z"), new TimeseriesResultValue(ImmutableMap.<String, Object>builder().put("rows", 1L).put("val", 13L).build())));
try {
TimeseriesQueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
QueryRunner runner = new FinalizeResultsQueryRunner(factory.createRunner(segment), factory.getToolchest());
TestHelper.assertExpectedResults(expectedResults, runner.run(query, Maps.newHashMap()));
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
Aggregations