Search in sources :

Example 6 with DoubleSumAggregatorFactory

use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.

the class GroupByRules method translateAggregateCall.

/**
   * Translate an AggregateCall to Druid equivalents.
   *
   * @return translated aggregation, or null if translation failed.
   */
private static Aggregation translateAggregateCall(final PlannerContext plannerContext, final RowSignature sourceRowSignature, final Project project, final AggregateCall call, final DruidOperatorTable operatorTable, final List<Aggregation> existingAggregations, final int aggNumber, final boolean approximateCountDistinct) {
    final List<DimFilter> filters = Lists.newArrayList();
    final List<String> rowOrder = sourceRowSignature.getRowOrder();
    final String name = aggOutputName(aggNumber);
    final SqlKind kind = call.getAggregation().getKind();
    final SqlTypeName outputType = call.getType().getSqlTypeName();
    if (call.filterArg >= 0) {
        // AGG(xxx) FILTER(WHERE yyy)
        if (project == null) {
            // We need some kind of projection to support filtered aggregations.
            return null;
        }
        final RexNode expression = project.getChildExps().get(call.filterArg);
        final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, expression);
        if (filter == null) {
            return null;
        }
        filters.add(filter);
    }
    if (kind == SqlKind.COUNT && call.getArgList().isEmpty()) {
        // COUNT(*)
        return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
    } else if (kind == SqlKind.COUNT && call.isDistinct()) {
        // COUNT(DISTINCT x)
        return approximateCountDistinct ? APPROX_COUNT_DISTINCT.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
    } else if (kind == SqlKind.COUNT || kind == SqlKind.SUM || kind == SqlKind.SUM0 || kind == SqlKind.MIN || kind == SqlKind.MAX || kind == SqlKind.AVG) {
        // Built-in agg, not distinct, not COUNT(*)
        boolean forceCount = false;
        final FieldOrExpression input;
        final int inputField = Iterables.getOnlyElement(call.getArgList());
        final RexNode rexNode = Expressions.fromFieldAccess(sourceRowSignature, project, inputField);
        final FieldOrExpression foe = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, rexNode);
        if (foe != null) {
            input = foe;
        } else if (rexNode.getKind() == SqlKind.CASE && ((RexCall) rexNode).getOperands().size() == 3) {
            // Possibly a CASE-style filtered aggregation. Styles supported:
            // A: SUM(CASE WHEN x = 'foo' THEN cnt END) => operands (x = 'foo', cnt, null)
            // B: SUM(CASE WHEN x = 'foo' THEN 1 ELSE 0 END) => operands (x = 'foo', 1, 0)
            // C: COUNT(CASE WHEN x = 'foo' THEN 'dummy' END) => operands (x = 'foo', 'dummy', null)
            // If the null and non-null args are switched, "flip" is set, which negates the filter.
            final RexCall caseCall = (RexCall) rexNode;
            final boolean flip = RexLiteral.isNullLiteral(caseCall.getOperands().get(1)) && !RexLiteral.isNullLiteral(caseCall.getOperands().get(2));
            final RexNode arg1 = caseCall.getOperands().get(flip ? 2 : 1);
            final RexNode arg2 = caseCall.getOperands().get(flip ? 1 : 2);
            // Operand 1: Filter
            final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, caseCall.getOperands().get(0));
            if (filter == null) {
                return null;
            } else {
                filters.add(flip ? new NotDimFilter(filter) : filter);
            }
            if (call.getAggregation().getKind() == SqlKind.COUNT && arg1 instanceof RexLiteral && !RexLiteral.isNullLiteral(arg1) && RexLiteral.isNullLiteral(arg2)) {
                // Case C
                forceCount = true;
                input = null;
            } else if (call.getAggregation().getKind() == SqlKind.SUM && arg1 instanceof RexLiteral && ((Number) RexLiteral.value(arg1)).intValue() == 1 && arg2 instanceof RexLiteral && ((Number) RexLiteral.value(arg2)).intValue() == 0) {
                // Case B
                forceCount = true;
                input = null;
            } else if (RexLiteral.isNullLiteral(arg2)) {
                // Maybe case A
                input = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, arg1);
                if (input == null) {
                    return null;
                }
            } else {
                // Can't translate CASE into a filter.
                return null;
            }
        } else {
            // Can't translate operand.
            return null;
        }
        if (!forceCount) {
            Preconditions.checkNotNull(input, "WTF?! input was null for non-COUNT aggregation");
        }
        if (forceCount || kind == SqlKind.COUNT) {
            // COUNT(x)
            return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
        } else {
            // Built-in aggregator that is not COUNT.
            final Aggregation retVal;
            final String fieldName = input.getFieldName();
            final String expression = input.getExpression();
            final boolean isLong = SqlTypeName.INT_TYPES.contains(outputType) || SqlTypeName.TIMESTAMP == outputType || SqlTypeName.DATE == outputType;
            if (kind == SqlKind.SUM || kind == SqlKind.SUM0) {
                retVal = isLong ? Aggregation.create(new LongSumAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleSumAggregatorFactory(name, fieldName, expression));
            } else if (kind == SqlKind.MIN) {
                retVal = isLong ? Aggregation.create(new LongMinAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMinAggregatorFactory(name, fieldName, expression));
            } else if (kind == SqlKind.MAX) {
                retVal = isLong ? Aggregation.create(new LongMaxAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMaxAggregatorFactory(name, fieldName, expression));
            } else if (kind == SqlKind.AVG) {
                final String sumName = aggInternalName(aggNumber, "sum");
                final String countName = aggInternalName(aggNumber, "count");
                final AggregatorFactory sum = isLong ? new LongSumAggregatorFactory(sumName, fieldName, expression) : new DoubleSumAggregatorFactory(sumName, fieldName, expression);
                final AggregatorFactory count = new CountAggregatorFactory(countName);
                retVal = Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
            } else {
                // Not reached.
                throw new ISE("WTF?! Kind[%s] got into the built-in aggregator path somehow?!", kind);
            }
            return retVal.filter(makeFilter(filters, sourceRowSignature));
        }
    } else {
        // Not a built-in aggregator, check operator table.
        final SqlAggregator sqlAggregator = operatorTable.lookupAggregator(call.getAggregation().getName());
        return sqlAggregator != null ? sqlAggregator.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
    }
}
Also used : RexLiteral(org.apache.calcite.rex.RexLiteral) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) LongMinAggregatorFactory(io.druid.query.aggregation.LongMinAggregatorFactory) RexCall(org.apache.calcite.rex.RexCall) Aggregation(io.druid.sql.calcite.aggregation.Aggregation) ISE(io.druid.java.util.common.ISE) LongMaxAggregatorFactory(io.druid.query.aggregation.LongMaxAggregatorFactory) NotDimFilter(io.druid.query.filter.NotDimFilter) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) SqlKind(org.apache.calcite.sql.SqlKind) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(io.druid.query.aggregation.LongMaxAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongMinAggregatorFactory(io.druid.query.aggregation.LongMinAggregatorFactory) PostAggregatorFactory(io.druid.sql.calcite.aggregation.PostAggregatorFactory) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SqlAggregator(io.druid.sql.calcite.aggregation.SqlAggregator) ApproxCountDistinctSqlAggregator(io.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator) DimFilter(io.druid.query.filter.DimFilter) NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) RexNode(org.apache.calcite.rex.RexNode)

Example 7 with DoubleSumAggregatorFactory

use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.

the class TaskSerdeTest method testIndexTaskSerde.

@Test
public void testIndexTaskSerde() throws Exception {
    final IndexTask task = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P2D"))), jsonMapper), new IndexTask.IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true, true), new IndexTask.IndexTuningConfig(10000, 10, 9999, null, indexSpec, 3, true, true, true)), null, jsonMapper);
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    IndexTask.IndexIOConfig taskIoConfig = task.getIngestionSchema().getIOConfig();
    IndexTask.IndexIOConfig task2IoConfig = task2.getIngestionSchema().getIOConfig();
    Assert.assertTrue(taskIoConfig.getFirehoseFactory() instanceof LocalFirehoseFactory);
    Assert.assertTrue(task2IoConfig.getFirehoseFactory() instanceof LocalFirehoseFactory);
    Assert.assertEquals(taskIoConfig.isAppendToExisting(), task2IoConfig.isAppendToExisting());
    Assert.assertEquals(taskIoConfig.isSkipFirehoseCaching(), task2IoConfig.isSkipFirehoseCaching());
    IndexTask.IndexTuningConfig taskTuningConfig = task.getIngestionSchema().getTuningConfig();
    IndexTask.IndexTuningConfig task2TuningConfig = task2.getIngestionSchema().getTuningConfig();
    Assert.assertEquals(taskTuningConfig.getBasePersistDirectory(), task2TuningConfig.getBasePersistDirectory());
    Assert.assertEquals(taskTuningConfig.getIndexSpec(), task2TuningConfig.getIndexSpec());
    Assert.assertEquals(taskTuningConfig.getIntermediatePersistPeriod(), task2TuningConfig.getIntermediatePersistPeriod());
    Assert.assertEquals(taskTuningConfig.getMaxPendingPersists(), task2TuningConfig.getMaxPendingPersists());
    Assert.assertEquals(taskTuningConfig.getMaxRowsInMemory(), task2TuningConfig.getMaxRowsInMemory());
    Assert.assertEquals(taskTuningConfig.getNumShards(), task2TuningConfig.getNumShards());
    Assert.assertEquals(taskTuningConfig.getTargetPartitionSize(), task2TuningConfig.getTargetPartitionSize());
    Assert.assertEquals(taskTuningConfig.isBuildV9Directly(), task2TuningConfig.isBuildV9Directly());
    Assert.assertEquals(taskTuningConfig.isForceExtendableShardSpecs(), task2TuningConfig.isForceExtendableShardSpecs());
    Assert.assertEquals(taskTuningConfig.isReportParseExceptions(), task2TuningConfig.isReportParseExceptions());
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LocalFirehoseFactory(io.druid.segment.realtime.firehose.LocalFirehoseFactory) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 8 with DoubleSumAggregatorFactory

use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.

the class TaskSerdeTest method testIndexTaskwithResourceSerde.

@Test
public void testIndexTaskwithResourceSerde() throws Exception {
    final IndexTask task = new IndexTask(null, new TaskResource("rofl", 2), new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P2D"))), jsonMapper), new IndexTask.IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true, null), new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, 3, true, true, true)), null, jsonMapper);
    for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
    Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertTrue(task.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
    Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LocalFirehoseFactory(io.druid.segment.realtime.firehose.LocalFirehoseFactory) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) FirehoseModule(io.druid.guice.FirehoseModule) Module(com.fasterxml.jackson.databind.Module) FirehoseModule(io.druid.guice.FirehoseModule) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 9 with DoubleSumAggregatorFactory

use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.

the class TaskLifecycleTest method testIndexTask.

@Test
public void testIndexTask() throws Exception {
    final Task indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P2D"))), mapper), new IndexTask.IndexIOConfig(new MockFirehoseFactory(false), false, null), new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, 3, true, true, true)), null, MAPPER);
    final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId());
    Assert.assertTrue("pre run task status not present", !preRunTaskStatus.isPresent());
    final TaskStatus mergedStatus = runTask(indexTask);
    final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
    final List<DataSegment> publishedSegments = byIntervalOrdering.sortedCopy(mdc.getPublished());
    final List<DataSegment> loggedSegments = byIntervalOrdering.sortedCopy(tsqa.getInsertedSegments(indexTask.getId()));
    Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode());
    Assert.assertEquals("merged statusCode", TaskStatus.Status.SUCCESS, mergedStatus.getStatusCode());
    Assert.assertEquals("segments logged vs published", loggedSegments, publishedSegments);
    Assert.assertEquals("num segments published", 2, mdc.getPublished().size());
    Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
    Assert.assertEquals("segment1 datasource", "foo", publishedSegments.get(0).getDataSource());
    Assert.assertEquals("segment1 interval", new Interval("2010-01-01/P1D"), publishedSegments.get(0).getInterval());
    Assert.assertEquals("segment1 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(0).getDimensions());
    Assert.assertEquals("segment1 metrics", ImmutableList.of("met"), publishedSegments.get(0).getMetrics());
    Assert.assertEquals("segment2 datasource", "foo", publishedSegments.get(1).getDataSource());
    Assert.assertEquals("segment2 interval", new Interval("2010-01-02/P1D"), publishedSegments.get(1).getInterval());
    Assert.assertEquals("segment2 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(1).getDimensions());
    Assert.assertEquals("segment2 metrics", ImmutableList.of("met"), publishedSegments.get(1).getMetrics());
}
Also used : IndexTask(io.druid.indexing.common.task.IndexTask) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) AbstractFixedIntervalTask(io.druid.indexing.common.task.AbstractFixedIntervalTask) KillTask(io.druid.indexing.common.task.KillTask) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) IndexTask(io.druid.indexing.common.task.IndexTask) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) Interval(org.joda.time.Interval) FireDepartmentTest(io.druid.segment.realtime.FireDepartmentTest) Test(org.junit.Test)

Example 10 with DoubleSumAggregatorFactory

use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.

the class TaskLifecycleTest method testIndexTaskFailure.

@Test
public void testIndexTaskFailure() throws Exception {
    final Task indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P1D"))), mapper), new IndexTask.IndexIOConfig(new MockExceptionalFirehoseFactory(), false, null), new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, 3, true, true, true)), null, MAPPER);
    final TaskStatus status = runTask(indexTask);
    Assert.assertEquals("statusCode", TaskStatus.Status.FAILED, status.getStatusCode());
    Assert.assertEquals("num segments published", 0, mdc.getPublished().size());
    Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
}
Also used : DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTask(io.druid.indexing.common.task.IndexTask) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) AbstractFixedIntervalTask(io.druid.indexing.common.task.AbstractFixedIntervalTask) KillTask(io.druid.indexing.common.task.KillTask) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) IndexTask(io.druid.indexing.common.task.IndexTask) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) TaskStatus(io.druid.indexing.common.TaskStatus) Interval(org.joda.time.Interval) FireDepartmentTest(io.druid.segment.realtime.FireDepartmentTest) Test(org.junit.Test)

Aggregations

DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)40 Test (org.junit.Test)30 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)21 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)15 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)13 Interval (org.joda.time.Interval)12 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)8 TimestampSpec (io.druid.data.input.impl.TimestampSpec)8 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)8 ArrayList (java.util.ArrayList)8 DataSchema (io.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)7 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)6 PostAggregator (io.druid.query.aggregation.PostAggregator)6 ArithmeticPostAggregator (io.druid.query.aggregation.post.ArithmeticPostAggregator)6 FieldAccessPostAggregator (io.druid.query.aggregation.post.FieldAccessPostAggregator)6 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)6 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)5 ConstantPostAggregator (io.druid.query.aggregation.post.ConstantPostAggregator)5 TimeseriesQueryRunnerFactory (io.druid.query.timeseries.TimeseriesQueryRunnerFactory)5