Search in sources :

Example 81 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project hive by apache.

the class DruidStorageHandlerUtils method getDimensionsAndAggregates.

public static Pair<List<DimensionSchema>, AggregatorFactory[]> getDimensionsAndAggregates(List<String> columnNames, List<TypeInfo> columnTypes) {
    // Default, all columns that are not metrics or timestamp, are treated as dimensions
    final List<DimensionSchema> dimensions = new ArrayList<>();
    ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
    for (int i = 0; i < columnTypes.size(); i++) {
        final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes.get(i)).getPrimitiveCategory();
        AggregatorFactory af;
        switch(primitiveCategory) {
            case BYTE:
            case SHORT:
            case INT:
            case LONG:
                af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case FLOAT:
                af = new FloatSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case DOUBLE:
                af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case DECIMAL:
                throw new UnsupportedOperationException(String.format("Druid does not support decimal column type cast column " + "[%s] to double", columnNames.get(i)));
            case TIMESTAMP:
                // Granularity column
                String tColumnName = columnNames.get(i);
                if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME) && !tColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
                    throw new IllegalArgumentException("Dimension " + tColumnName + " does not have STRING type: " + primitiveCategory);
                }
                continue;
            case TIMESTAMPLOCALTZ:
                // Druid timestamp column
                String tLocalTZColumnName = columnNames.get(i);
                if (!tLocalTZColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
                    throw new IllegalArgumentException("Dimension " + tLocalTZColumnName + " does not have STRING type: " + primitiveCategory);
                }
                continue;
            default:
                // Dimension
                String dColumnName = columnNames.get(i);
                if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(primitiveCategory) != PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP && primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
                    throw new IllegalArgumentException("Dimension " + dColumnName + " does not have STRING type: " + primitiveCategory);
                }
                dimensions.add(new StringDimensionSchema(dColumnName));
                continue;
        }
        aggregatorFactoryBuilder.add(af);
    }
    ImmutableList<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
    return Pair.of(dimensions, aggregatorFactories.toArray(new AggregatorFactory[0]));
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 82 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class MovingAverageIterableTest method testWithFilteredAggregation.

@Test
public void testWithFilteredAggregation() {
    Map<String, Object> event1 = new HashMap<>();
    Map<String, Object> event2 = new HashMap<>();
    List<DimensionSpec> ds = new ArrayList<>();
    ds.add(new DefaultDimensionSpec("gender", "gender"));
    event1.put("gender", "m");
    event1.put("pageViews", 10L);
    Row row1 = new MapBasedRow(JAN_1, event1);
    event2.put("gender", "m");
    event2.put("pageViews", 20L);
    Row row2 = new MapBasedRow(JAN_4, event2);
    Sequence<RowBucket> seq = Sequences.simple(Arrays.asList(new RowBucket(JAN_1, Collections.singletonList(row1)), new RowBucket(JAN_2, Collections.emptyList()), new RowBucket(JAN_3, Collections.emptyList()), new RowBucket(JAN_4, Collections.singletonList(row2))));
    AveragerFactory averagerfactory = new LongMeanAveragerFactory("movingAvgPageViews", 4, 1, "pageViews");
    AggregatorFactory aggregatorFactory = new LongSumAggregatorFactory("pageViews", "pageViews");
    DimFilter filter = new SelectorDimFilter("gender", "m", null);
    FilteredAggregatorFactory filteredAggregatorFactory = new FilteredAggregatorFactory(aggregatorFactory, filter);
    Iterator<Row> iter = new MovingAverageIterable(seq, ds, Collections.singletonList(averagerfactory), Collections.emptyList(), Collections.singletonList(filteredAggregatorFactory)).iterator();
    Assert.assertTrue(iter.hasNext());
    Row result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(7.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertFalse(iter.hasNext());
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ConstantAveragerFactory(org.apache.druid.query.movingaverage.averagers.ConstantAveragerFactory) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) LongMeanAveragerFactory(org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) LongMeanAveragerFactory(org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 83 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class HllSketchBaseSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    // Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
    // for string columns.
    final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
    if (columnArg == null) {
        return null;
    }
    final int logK;
    if (aggregateCall.getArgList().size() >= 2) {
        final RexNode logKarg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
        if (!logKarg.isA(SqlKind.LITERAL)) {
            // logK must be a literal in order to plan.
            return null;
        }
        logK = ((Number) RexLiteral.value(logKarg)).intValue();
    } else {
        logK = HllSketchAggregatorFactory.DEFAULT_LG_K;
    }
    final String tgtHllType;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode tgtHllTypeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        if (!tgtHllTypeArg.isA(SqlKind.LITERAL)) {
            // tgtHllType must be a literal in order to plan.
            return null;
        }
        tgtHllType = RexLiteral.stringValue(tgtHllTypeArg);
    } else {
        tgtHllType = HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE.name();
    }
    final AggregatorFactory aggregatorFactory;
    final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
    if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
        aggregatorFactory = new HllSketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), logK, tgtHllType, ROUND);
    } else {
        final RelDataType dataType = columnRexNode.getType();
        final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
        if (inputType == null) {
            throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
        }
        final DimensionSpec dimensionSpec;
        if (columnArg.isDirectColumnAccess()) {
            dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
        } else {
            String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
            dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
        }
        aggregatorFactory = new HllSketchBuildAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), logK, tgtHllType, ROUND);
    }
    return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) RelDataType(org.apache.calcite.rel.type.RelDataType) ISE(org.apache.druid.java.util.common.ISE) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) HllSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 84 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class ThetaSketchBaseSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    // Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
    // for string columns.
    final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
    if (columnArg == null) {
        return null;
    }
    final int sketchSize;
    if (aggregateCall.getArgList().size() >= 2) {
        final RexNode sketchSizeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
        if (!sketchSizeArg.isA(SqlKind.LITERAL)) {
            // logK must be a literal in order to plan.
            return null;
        }
        sketchSize = ((Number) RexLiteral.value(sketchSizeArg)).intValue();
    } else {
        sketchSize = SketchAggregatorFactory.DEFAULT_MAX_SKETCH_SIZE;
    }
    final AggregatorFactory aggregatorFactory;
    final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
    if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
        aggregatorFactory = new SketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), sketchSize, null, null, null);
    } else {
        final RelDataType dataType = columnRexNode.getType();
        final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
        if (inputType == null) {
            throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
        }
        final DimensionSpec dimensionSpec;
        if (columnArg.isDirectColumnAccess()) {
            dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
        } else {
            String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
            dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
        }
        aggregatorFactory = new SketchMergeAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), sketchSize, null, null, null);
    }
    return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
Also used : SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) RelDataType(org.apache.calcite.rel.type.RelDataType) ISE(org.apache.druid.java.util.common.ISE) SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 85 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class DoublesSketchObjectSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = StringUtils.format("%s:agg", name);
    final int k;
    if (aggregateCall.getArgList().size() >= 2) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
        if (!resolutionArg.isA(SqlKind.LITERAL)) {
            // Resolution must be a literal in order to plan.
            return null;
        }
        k = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        k = DoublesSketchAggregatorFactory.DEFAULT_K;
    }
    // No existing match found. Create a new one.
    if (input.isDirectColumnAccess()) {
        aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, input.getDirectColumn(), k, DoublesSketchApproxQuantileSqlAggregator.getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
        aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, virtualColumnName, k, DoublesSketchApproxQuantileSqlAggregator.getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), null);
}
Also used : DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoublesSketchAggregatorFactory(org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Aggregations

AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)163 Test (org.junit.Test)85 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)56 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)48 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)42 ArrayList (java.util.ArrayList)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)28 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 Nullable (javax.annotation.Nullable)22 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)22 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)22 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)22 HashMap (java.util.HashMap)20 List (java.util.List)20 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)18 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)18 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)18 Map (java.util.Map)17 FloatFirstAggregatorFactory (org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory)15