use of org.apache.druid.query.aggregation.AggregatorFactory in project hive by apache.
the class DruidStorageHandlerUtils method getDimensionsAndAggregates.
public static Pair<List<DimensionSchema>, AggregatorFactory[]> getDimensionsAndAggregates(List<String> columnNames, List<TypeInfo> columnTypes) {
// Default, all columns that are not metrics or timestamp, are treated as dimensions
final List<DimensionSchema> dimensions = new ArrayList<>();
ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
for (int i = 0; i < columnTypes.size(); i++) {
final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes.get(i)).getPrimitiveCategory();
AggregatorFactory af;
switch(primitiveCategory) {
case BYTE:
case SHORT:
case INT:
case LONG:
af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case FLOAT:
af = new FloatSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case DOUBLE:
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case DECIMAL:
throw new UnsupportedOperationException(String.format("Druid does not support decimal column type cast column " + "[%s] to double", columnNames.get(i)));
case TIMESTAMP:
// Granularity column
String tColumnName = columnNames.get(i);
if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME) && !tColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalArgumentException("Dimension " + tColumnName + " does not have STRING type: " + primitiveCategory);
}
continue;
case TIMESTAMPLOCALTZ:
// Druid timestamp column
String tLocalTZColumnName = columnNames.get(i);
if (!tLocalTZColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalArgumentException("Dimension " + tLocalTZColumnName + " does not have STRING type: " + primitiveCategory);
}
continue;
default:
// Dimension
String dColumnName = columnNames.get(i);
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(primitiveCategory) != PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP && primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
throw new IllegalArgumentException("Dimension " + dColumnName + " does not have STRING type: " + primitiveCategory);
}
dimensions.add(new StringDimensionSchema(dColumnName));
continue;
}
aggregatorFactoryBuilder.add(af);
}
ImmutableList<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
return Pair.of(dimensions, aggregatorFactories.toArray(new AggregatorFactory[0]));
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class MovingAverageIterableTest method testWithFilteredAggregation.
@Test
public void testWithFilteredAggregation() {
Map<String, Object> event1 = new HashMap<>();
Map<String, Object> event2 = new HashMap<>();
List<DimensionSpec> ds = new ArrayList<>();
ds.add(new DefaultDimensionSpec("gender", "gender"));
event1.put("gender", "m");
event1.put("pageViews", 10L);
Row row1 = new MapBasedRow(JAN_1, event1);
event2.put("gender", "m");
event2.put("pageViews", 20L);
Row row2 = new MapBasedRow(JAN_4, event2);
Sequence<RowBucket> seq = Sequences.simple(Arrays.asList(new RowBucket(JAN_1, Collections.singletonList(row1)), new RowBucket(JAN_2, Collections.emptyList()), new RowBucket(JAN_3, Collections.emptyList()), new RowBucket(JAN_4, Collections.singletonList(row2))));
AveragerFactory averagerfactory = new LongMeanAveragerFactory("movingAvgPageViews", 4, 1, "pageViews");
AggregatorFactory aggregatorFactory = new LongSumAggregatorFactory("pageViews", "pageViews");
DimFilter filter = new SelectorDimFilter("gender", "m", null);
FilteredAggregatorFactory filteredAggregatorFactory = new FilteredAggregatorFactory(aggregatorFactory, filter);
Iterator<Row> iter = new MovingAverageIterable(seq, ds, Collections.singletonList(averagerfactory), Collections.emptyList(), Collections.singletonList(filteredAggregatorFactory)).iterator();
Assert.assertTrue(iter.hasNext());
Row result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(7.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertFalse(iter.hasNext());
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class HllSketchBaseSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
if (columnArg == null) {
return null;
}
final int logK;
if (aggregateCall.getArgList().size() >= 2) {
final RexNode logKarg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!logKarg.isA(SqlKind.LITERAL)) {
// logK must be a literal in order to plan.
return null;
}
logK = ((Number) RexLiteral.value(logKarg)).intValue();
} else {
logK = HllSketchAggregatorFactory.DEFAULT_LG_K;
}
final String tgtHllType;
if (aggregateCall.getArgList().size() >= 3) {
final RexNode tgtHllTypeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
if (!tgtHllTypeArg.isA(SqlKind.LITERAL)) {
// tgtHllType must be a literal in order to plan.
return null;
}
tgtHllType = RexLiteral.stringValue(tgtHllTypeArg);
} else {
tgtHllType = HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE.name();
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new HllSketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), logK, tgtHllType, ROUND);
} else {
final RelDataType dataType = columnRexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (columnArg.isDirectColumnAccess()) {
dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new HllSketchBuildAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), logK, tgtHllType, ROUND);
}
return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class ThetaSketchBaseSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode columnRexNode = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression columnArg = Expressions.toDruidExpression(plannerContext, rowSignature, columnRexNode);
if (columnArg == null) {
return null;
}
final int sketchSize;
if (aggregateCall.getArgList().size() >= 2) {
final RexNode sketchSizeArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!sketchSizeArg.isA(SqlKind.LITERAL)) {
// logK must be a literal in order to plan.
return null;
}
sketchSize = ((Number) RexLiteral.value(sketchSizeArg)).intValue();
} else {
sketchSize = SketchAggregatorFactory.DEFAULT_MAX_SKETCH_SIZE;
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (columnArg.isDirectColumnAccess() && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new SketchMergeAggregatorFactory(aggregatorName, columnArg.getDirectColumn(), sketchSize, null, null, null);
} else {
final RelDataType dataType = columnRexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (columnArg.isDirectColumnAccess()) {
dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(columnArg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new SketchMergeAggregatorFactory(aggregatorName, dimensionSpec.getDimension(), sketchSize, null, null, null);
}
return toAggregation(name, finalizeAggregations, aggregatorFactory);
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DoublesSketchObjectSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String histogramName = StringUtils.format("%s:agg", name);
final int k;
if (aggregateCall.getArgList().size() >= 2) {
final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!resolutionArg.isA(SqlKind.LITERAL)) {
// Resolution must be a literal in order to plan.
return null;
}
k = ((Number) RexLiteral.value(resolutionArg)).intValue();
} else {
k = DoublesSketchAggregatorFactory.DEFAULT_K;
}
// No existing match found. Create a new one.
if (input.isDirectColumnAccess()) {
aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, input.getDirectColumn(), k, DoublesSketchApproxQuantileSqlAggregator.getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
aggregatorFactory = new DoublesSketchAggregatorFactory(histogramName, virtualColumnName, k, DoublesSketchApproxQuantileSqlAggregator.getMaxStreamLengthFromQueryContext(plannerContext.getQueryContext()));
}
return Aggregation.create(ImmutableList.of(aggregatorFactory), null);
}
Aggregations