Search in sources :

Example 26 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class BloomFilterSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression input = Expressions.toDruidExpression(plannerContext, rowSignature, inputOperand);
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String aggName = StringUtils.format("%s:agg", name);
    final RexNode maxNumEntriesOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    if (!maxNumEntriesOperand.isA(SqlKind.LITERAL)) {
        // maxNumEntriesOperand must be a literal in order to plan.
        return null;
    }
    final int maxNumEntries = ((Number) RexLiteral.value(maxNumEntriesOperand)).intValue();
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (factory instanceof BloomFilterAggregatorFactory) {
                final BloomFilterAggregatorFactory theFactory = (BloomFilterAggregatorFactory) factory;
                // Check input for equivalence.
                final boolean inputMatches;
                final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
                if (virtualInput == null) {
                    if (input.isDirectColumnAccess()) {
                        inputMatches = input.getDirectColumn().equals(theFactory.getField().getDimension());
                    } else {
                        inputMatches = input.getSimpleExtraction().getColumn().equals(theFactory.getField().getDimension()) && input.getSimpleExtraction().getExtractionFn().equals(theFactory.getField().getExtractionFn());
                    }
                } else {
                    inputMatches = virtualInput.equals(input);
                }
                final boolean matches = inputMatches && theFactory.getMaxNumEntries() == maxNumEntries;
                if (matches) {
                    // Found existing one. Use this.
                    return Aggregation.create(theFactory);
                }
            }
        }
    }
    // No existing match found. Create a new one.
    ColumnType valueType = Calcites.getColumnTypeForRelDataType(inputOperand.getType());
    final DimensionSpec spec;
    if (input.isDirectColumnAccess()) {
        spec = new DefaultDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType);
    } else if (input.isSimpleExtraction()) {
        spec = new ExtractionDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType, input.getSimpleExtraction().getExtractionFn());
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, inputOperand.getType());
        spec = new DefaultDimensionSpec(virtualColumnName, StringUtils.format("%s:%s", name, virtualColumnName));
    }
    aggregatorFactory = new BloomFilterAggregatorFactory(aggName, spec, maxNumEntries);
    return Aggregation.create(aggregatorFactory);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) RexNode(org.apache.calcite.rex.RexNode) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Nullable(javax.annotation.Nullable)

Example 27 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class InputRowSerde method toBytes.

public static SerializeResult toBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, final InputRow row, AggregatorFactory[] aggs) {
    try {
        List<String> parseExceptionMessages = new ArrayList<>();
        ByteArrayDataOutput out = ByteStreams.newDataOutput();
        // write timestamp
        out.writeLong(row.getTimestampFromEpoch());
        // writing all dimensions
        List<String> dimList = row.getDimensions();
        WritableUtils.writeVInt(out, dimList.size());
        for (String dim : dimList) {
            IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dim);
            if (typeHelper == null) {
                typeHelper = STRING_HELPER;
            }
            writeString(dim, out);
            try {
                typeHelper.serialize(out, row.getRaw(dim));
            } catch (ParseException pe) {
                parseExceptionMessages.add(pe.getMessage());
            }
        }
        // writing all metrics
        Supplier<InputRow> supplier = () -> row;
        WritableUtils.writeVInt(out, aggs.length);
        for (AggregatorFactory aggFactory : aggs) {
            String k = aggFactory.getName();
            writeString(k, out);
            try (Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true))) {
                try {
                    agg.aggregate();
                } catch (ParseException e) {
                    // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                    log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
                    parseExceptionMessages.add(e.getMessage());
                }
                final ColumnType type = aggFactory.getIntermediateType();
                if (agg.isNull()) {
                    out.writeByte(NullHandling.IS_NULL_BYTE);
                } else {
                    out.writeByte(NullHandling.IS_NOT_NULL_BYTE);
                    if (type.is(ValueType.FLOAT)) {
                        out.writeFloat(agg.getFloat());
                    } else if (type.is(ValueType.LONG)) {
                        WritableUtils.writeVLong(out, agg.getLong());
                    } else if (type.is(ValueType.DOUBLE)) {
                        out.writeDouble(agg.getDouble());
                    } else if (type.is(ValueType.COMPLEX)) {
                        Object val = agg.get();
                        ComplexMetricSerde serde = getComplexMetricSerde(type.getComplexTypeName());
                        writeBytes(serde.toBytes(val), out);
                    } else {
                        throw new IAE("Unable to serialize type[%s]", type.asTypeString());
                    }
                }
            }
        }
        return new SerializeResult(out.toByteArray(), parseExceptionMessages);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ComplexMetricSerde(org.apache.druid.segment.serde.ComplexMetricSerde) ArrayList(java.util.ArrayList) Aggregator(org.apache.druid.query.aggregation.Aggregator) IOException(java.io.IOException) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) ByteArrayDataOutput(com.google.common.io.ByteArrayDataOutput) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 28 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class BaseVarianceSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, inputOperand);
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final RelDataType dataType = inputOperand.getType();
    final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
    final DimensionSpec dimensionSpec;
    final String aggName = StringUtils.format("%s:agg", name);
    final SqlAggFunction func = calciteFunction();
    final String estimator;
    final String inputTypeName;
    PostAggregator postAggregator = null;
    if (input.isSimpleExtraction()) {
        dimensionSpec = input.getSimpleExtraction().toDimensionSpec(null, inputType);
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, dataType);
        dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
    }
    if (inputType == null) {
        throw new IAE("VarianceSqlAggregator[%s] has invalid inputType", func);
    }
    if (inputType.isNumeric()) {
        inputTypeName = StringUtils.toLowerCase(inputType.getType().name());
    } else {
        throw new IAE("VarianceSqlAggregator[%s] has invalid inputType[%s]", func, inputType.asTypeString());
    }
    if (func == SqlStdOperatorTable.VAR_POP || func == SqlStdOperatorTable.STDDEV_POP) {
        estimator = "population";
    } else {
        estimator = "sample";
    }
    aggregatorFactory = new VarianceAggregatorFactory(aggName, dimensionSpec.getDimension(), estimator, inputTypeName);
    if (func == SqlStdOperatorTable.STDDEV_POP || func == SqlStdOperatorTable.STDDEV_SAMP || func == SqlStdOperatorTable.STDDEV) {
        postAggregator = new StandardDeviationPostAggregator(name, aggregatorFactory.getName(), estimator);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), postAggregator);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 29 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class RowBasedGrouperHelper method makeValueConvertFunctions.

@SuppressWarnings("unchecked")
private static Function<Comparable, Comparable>[] makeValueConvertFunctions(final List<ColumnType> valueTypes) {
    final Function<Comparable, Comparable>[] functions = new Function[valueTypes.size()];
    for (int i = 0; i < functions.length; i++) {
        // Subquery post-aggs aren't added to the rowSignature (see rowSignatureFor() in GroupByQueryHelper) because
        // their types aren't known, so default to String handling.
        final ColumnType type = valueTypes.get(i) == null ? ColumnType.STRING : valueTypes.get(i);
        functions[i] = input -> DimensionHandlerUtils.convertObjectToType(input, type);
    }
    return functions;
}
Also used : ToLongFunction(java.util.function.ToLongFunction) Function(java.util.function.Function) ColumnType(org.apache.druid.segment.column.ColumnType)

Example 30 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class GroupByQueryEngineV2 method convertRowTypesToOutputTypes.

public static void convertRowTypesToOutputTypes(final List<DimensionSpec> dimensionSpecs, final ResultRow resultRow, final int resultRowDimensionStart) {
    for (int i = 0; i < dimensionSpecs.size(); i++) {
        DimensionSpec dimSpec = dimensionSpecs.get(i);
        final int resultRowIndex = resultRowDimensionStart + i;
        final ColumnType outputType = dimSpec.getOutputType();
        resultRow.set(resultRowIndex, DimensionHandlerUtils.convertObjectToType(resultRow.get(resultRowIndex), outputType));
    }
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType)

Aggregations

ColumnType (org.apache.druid.segment.column.ColumnType)43 Nullable (javax.annotation.Nullable)16 ISE (org.apache.druid.java.util.common.ISE)15 RowSignature (org.apache.druid.segment.column.RowSignature)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)13 RexNode (org.apache.calcite.rex.RexNode)12 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)12 List (java.util.List)11 IAE (org.apache.druid.java.util.common.IAE)11 RelDataType (org.apache.calcite.rel.type.RelDataType)9 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)9 Collectors (java.util.stream.Collectors)8 ArrayList (java.util.ArrayList)7 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)5 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)5 StringComparator (org.apache.druid.query.ordering.StringComparator)5 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)5 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)4 Preconditions (com.google.common.base.Preconditions)4 Collections (java.util.Collections)4