Search in sources :

Example 1 with VarianceAggregatorFactory

use of org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory in project druid by druid-io.

the class VarianceSqlAggregatorTest method testStdDevWithVirtualColumns.

@Test
public void testStdDevWithVirtualColumns() throws Exception {
    VarianceAggregatorCollector holder1 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder2 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder3 = new VarianceAggregatorCollector();
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw1 = row.getRaw("d1");
        Object raw2 = row.getRaw("f1");
        Object raw3 = row.getRaw("l1");
        addToHolder(holder1, raw1, 7);
        addToHolder(holder2, raw2, 7);
        addToHolder(holder3, raw3, 7);
    }
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { Math.sqrt(holder1.getVariance(false)), (float) Math.sqrt(holder2.getVariance(false)), (long) Math.sqrt(holder3.getVariance(false)) });
    testQuery("SELECT\n" + "STDDEV(d1*7),\n" + "STDDEV(f1*7),\n" + "STDDEV(l1*7)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(BaseCalciteQueryTest.expressionVirtualColumn("v0", "(\"d1\" * 7)", ColumnType.DOUBLE), BaseCalciteQueryTest.expressionVirtualColumn("v1", "(\"f1\" * 7)", ColumnType.FLOAT), BaseCalciteQueryTest.expressionVirtualColumn("v2", "(\"l1\" * 7)", ColumnType.LONG)).aggregators(ImmutableList.of(new VarianceAggregatorFactory("a0:agg", "v0", "sample", "double"), new VarianceAggregatorFactory("a1:agg", "v1", "sample", "float"), new VarianceAggregatorFactory("a2:agg", "v2", "sample", "long"))).postAggregators(new StandardDeviationPostAggregator("a0", "a0:agg", "sample"), new StandardDeviationPostAggregator("a1", "a1:agg", "sample"), new StandardDeviationPostAggregator("a2", "a2:agg", "sample")).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : VarianceAggregatorCollector(org.apache.druid.query.aggregation.variance.VarianceAggregatorCollector) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) InputRow(org.apache.druid.data.input.InputRow) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 2 with VarianceAggregatorFactory

use of org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory in project druid by druid-io.

the class VarianceSqlAggregatorTest method testVarSamp.

@Test
public void testVarSamp() throws Exception {
    VarianceAggregatorCollector holder1 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder2 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder3 = new VarianceAggregatorCollector();
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw1 = row.getRaw("d1");
        Object raw2 = row.getRaw("f1");
        Object raw3 = row.getRaw("l1");
        addToHolder(holder1, raw1);
        addToHolder(holder2, raw2);
        addToHolder(holder3, raw3);
    }
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { holder1.getVariance(false), holder2.getVariance(false).floatValue(), holder3.getVariance(false).longValue() });
    testQuery("SELECT\n" + "VAR_SAMP(d1),\n" + "VAR_SAMP(f1),\n" + "VAR_SAMP(l1)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new VarianceAggregatorFactory("a0:agg", "d1", "sample", "double"), new VarianceAggregatorFactory("a1:agg", "f1", "sample", "float"), new VarianceAggregatorFactory("a2:agg", "l1", "sample", "long"))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : VarianceAggregatorCollector(org.apache.druid.query.aggregation.variance.VarianceAggregatorCollector) InputRow(org.apache.druid.data.input.InputRow) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 3 with VarianceAggregatorFactory

use of org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory in project druid by druid-io.

the class VarianceSqlAggregatorTest method testVarPop.

@Test
public void testVarPop() throws Exception {
    VarianceAggregatorCollector holder1 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder2 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder3 = new VarianceAggregatorCollector();
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw1 = row.getRaw("d1");
        Object raw2 = row.getRaw("f1");
        Object raw3 = row.getRaw("l1");
        addToHolder(holder1, raw1);
        addToHolder(holder2, raw2);
        addToHolder(holder3, raw3);
    }
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { holder1.getVariance(true), holder2.getVariance(true).floatValue(), holder3.getVariance(true).longValue() });
    testQuery("SELECT\n" + "VAR_POP(d1),\n" + "VAR_POP(f1),\n" + "VAR_POP(l1)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), new VarianceAggregatorFactory("a1:agg", "f1", "population", "float"), new VarianceAggregatorFactory("a2:agg", "l1", "population", "long"))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : VarianceAggregatorCollector(org.apache.druid.query.aggregation.variance.VarianceAggregatorCollector) InputRow(org.apache.druid.data.input.InputRow) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 4 with VarianceAggregatorFactory

use of org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory in project druid by druid-io.

the class VarianceSqlAggregatorTest method testStdDevPop.

@Test
public void testStdDevPop() throws Exception {
    VarianceAggregatorCollector holder1 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder2 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder3 = new VarianceAggregatorCollector();
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw1 = row.getRaw("d1");
        Object raw2 = row.getRaw("f1");
        Object raw3 = row.getRaw("l1");
        addToHolder(holder1, raw1);
        addToHolder(holder2, raw2);
        addToHolder(holder3, raw3);
    }
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { Math.sqrt(holder1.getVariance(true)), (float) Math.sqrt(holder2.getVariance(true)), (long) Math.sqrt(holder3.getVariance(true)) });
    testQuery("SELECT\n" + "STDDEV_POP(d1),\n" + "STDDEV_POP(f1),\n" + "STDDEV_POP(l1)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), new VarianceAggregatorFactory("a1:agg", "f1", "population", "float"), new VarianceAggregatorFactory("a2:agg", "l1", "population", "long"))).postAggregators(ImmutableList.of(new StandardDeviationPostAggregator("a0", "a0:agg", "population"), new StandardDeviationPostAggregator("a1", "a1:agg", "population"), new StandardDeviationPostAggregator("a2", "a2:agg", "population"))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : VarianceAggregatorCollector(org.apache.druid.query.aggregation.variance.VarianceAggregatorCollector) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) InputRow(org.apache.druid.data.input.InputRow) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 5 with VarianceAggregatorFactory

use of org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory in project druid by druid-io.

the class BaseVarianceSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, inputOperand);
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final RelDataType dataType = inputOperand.getType();
    final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
    final DimensionSpec dimensionSpec;
    final String aggName = StringUtils.format("%s:agg", name);
    final SqlAggFunction func = calciteFunction();
    final String estimator;
    final String inputTypeName;
    PostAggregator postAggregator = null;
    if (input.isSimpleExtraction()) {
        dimensionSpec = input.getSimpleExtraction().toDimensionSpec(null, inputType);
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, dataType);
        dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
    }
    if (inputType == null) {
        throw new IAE("VarianceSqlAggregator[%s] has invalid inputType", func);
    }
    if (inputType.isNumeric()) {
        inputTypeName = StringUtils.toLowerCase(inputType.getType().name());
    } else {
        throw new IAE("VarianceSqlAggregator[%s] has invalid inputType[%s]", func, inputType.asTypeString());
    }
    if (func == SqlStdOperatorTable.VAR_POP || func == SqlStdOperatorTable.STDDEV_POP) {
        estimator = "population";
    } else {
        estimator = "sample";
    }
    aggregatorFactory = new VarianceAggregatorFactory(aggName, dimensionSpec.getDimension(), estimator, inputTypeName);
    if (func == SqlStdOperatorTable.STDDEV_POP || func == SqlStdOperatorTable.STDDEV_SAMP || func == SqlStdOperatorTable.STDDEV) {
        postAggregator = new StandardDeviationPostAggregator(name, aggregatorFactory.getName(), estimator);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), postAggregator);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Aggregations

VarianceAggregatorFactory (org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory)7 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)6 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)6 Test (org.junit.Test)6 InputRow (org.apache.druid.data.input.InputRow)5 VarianceAggregatorCollector (org.apache.druid.query.aggregation.variance.VarianceAggregatorCollector)5 StandardDeviationPostAggregator (org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator)4 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)2 Nullable (javax.annotation.Nullable)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexNode (org.apache.calcite.rex.RexNode)1 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)1 IAE (org.apache.druid.java.util.common.IAE)1 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)1 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)1 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)1 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)1 ColumnType (org.apache.druid.segment.column.ColumnType)1 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)1