use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class BloomFilterSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression input = Expressions.toDruidExpression(plannerContext, rowSignature, inputOperand);
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String aggName = StringUtils.format("%s:agg", name);
final RexNode maxNumEntriesOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!maxNumEntriesOperand.isA(SqlKind.LITERAL)) {
// maxNumEntriesOperand must be a literal in order to plan.
return null;
}
final int maxNumEntries = ((Number) RexLiteral.value(maxNumEntriesOperand)).intValue();
// Look for existing matching aggregatorFactory.
for (final Aggregation existing : existingAggregations) {
for (AggregatorFactory factory : existing.getAggregatorFactories()) {
if (factory instanceof BloomFilterAggregatorFactory) {
final BloomFilterAggregatorFactory theFactory = (BloomFilterAggregatorFactory) factory;
// Check input for equivalence.
final boolean inputMatches;
final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
if (virtualInput == null) {
if (input.isDirectColumnAccess()) {
inputMatches = input.getDirectColumn().equals(theFactory.getField().getDimension());
} else {
inputMatches = input.getSimpleExtraction().getColumn().equals(theFactory.getField().getDimension()) && input.getSimpleExtraction().getExtractionFn().equals(theFactory.getField().getExtractionFn());
}
} else {
inputMatches = virtualInput.equals(input);
}
final boolean matches = inputMatches && theFactory.getMaxNumEntries() == maxNumEntries;
if (matches) {
// Found existing one. Use this.
return Aggregation.create(theFactory);
}
}
}
}
// No existing match found. Create a new one.
ColumnType valueType = Calcites.getColumnTypeForRelDataType(inputOperand.getType());
final DimensionSpec spec;
if (input.isDirectColumnAccess()) {
spec = new DefaultDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType);
} else if (input.isSimpleExtraction()) {
spec = new ExtractionDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType, input.getSimpleExtraction().getExtractionFn());
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, inputOperand.getType());
spec = new DefaultDimensionSpec(virtualColumnName, StringUtils.format("%s:%s", name, virtualColumnName));
}
aggregatorFactory = new BloomFilterAggregatorFactory(aggName, spec, maxNumEntries);
return Aggregation.create(aggregatorFactory);
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class InputRowSerde method toBytes.
public static SerializeResult toBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, final InputRow row, AggregatorFactory[] aggs) {
try {
List<String> parseExceptionMessages = new ArrayList<>();
ByteArrayDataOutput out = ByteStreams.newDataOutput();
// write timestamp
out.writeLong(row.getTimestampFromEpoch());
// writing all dimensions
List<String> dimList = row.getDimensions();
WritableUtils.writeVInt(out, dimList.size());
for (String dim : dimList) {
IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dim);
if (typeHelper == null) {
typeHelper = STRING_HELPER;
}
writeString(dim, out);
try {
typeHelper.serialize(out, row.getRaw(dim));
} catch (ParseException pe) {
parseExceptionMessages.add(pe.getMessage());
}
}
// writing all metrics
Supplier<InputRow> supplier = () -> row;
WritableUtils.writeVInt(out, aggs.length);
for (AggregatorFactory aggFactory : aggs) {
String k = aggFactory.getName();
writeString(k, out);
try (Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true))) {
try {
agg.aggregate();
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
parseExceptionMessages.add(e.getMessage());
}
final ColumnType type = aggFactory.getIntermediateType();
if (agg.isNull()) {
out.writeByte(NullHandling.IS_NULL_BYTE);
} else {
out.writeByte(NullHandling.IS_NOT_NULL_BYTE);
if (type.is(ValueType.FLOAT)) {
out.writeFloat(agg.getFloat());
} else if (type.is(ValueType.LONG)) {
WritableUtils.writeVLong(out, agg.getLong());
} else if (type.is(ValueType.DOUBLE)) {
out.writeDouble(agg.getDouble());
} else if (type.is(ValueType.COMPLEX)) {
Object val = agg.get();
ComplexMetricSerde serde = getComplexMetricSerde(type.getComplexTypeName());
writeBytes(serde.toBytes(val), out);
} else {
throw new IAE("Unable to serialize type[%s]", type.asTypeString());
}
}
}
}
return new SerializeResult(out.toByteArray(), parseExceptionMessages);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class BaseVarianceSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, inputOperand);
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final RelDataType dataType = inputOperand.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
final DimensionSpec dimensionSpec;
final String aggName = StringUtils.format("%s:agg", name);
final SqlAggFunction func = calciteFunction();
final String estimator;
final String inputTypeName;
PostAggregator postAggregator = null;
if (input.isSimpleExtraction()) {
dimensionSpec = input.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
if (inputType == null) {
throw new IAE("VarianceSqlAggregator[%s] has invalid inputType", func);
}
if (inputType.isNumeric()) {
inputTypeName = StringUtils.toLowerCase(inputType.getType().name());
} else {
throw new IAE("VarianceSqlAggregator[%s] has invalid inputType[%s]", func, inputType.asTypeString());
}
if (func == SqlStdOperatorTable.VAR_POP || func == SqlStdOperatorTable.STDDEV_POP) {
estimator = "population";
} else {
estimator = "sample";
}
aggregatorFactory = new VarianceAggregatorFactory(aggName, dimensionSpec.getDimension(), estimator, inputTypeName);
if (func == SqlStdOperatorTable.STDDEV_POP || func == SqlStdOperatorTable.STDDEV_SAMP || func == SqlStdOperatorTable.STDDEV) {
postAggregator = new StandardDeviationPostAggregator(name, aggregatorFactory.getName(), estimator);
}
return Aggregation.create(ImmutableList.of(aggregatorFactory), postAggregator);
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class RowBasedGrouperHelper method makeValueConvertFunctions.
@SuppressWarnings("unchecked")
private static Function<Comparable, Comparable>[] makeValueConvertFunctions(final List<ColumnType> valueTypes) {
final Function<Comparable, Comparable>[] functions = new Function[valueTypes.size()];
for (int i = 0; i < functions.length; i++) {
// Subquery post-aggs aren't added to the rowSignature (see rowSignatureFor() in GroupByQueryHelper) because
// their types aren't known, so default to String handling.
final ColumnType type = valueTypes.get(i) == null ? ColumnType.STRING : valueTypes.get(i);
functions[i] = input -> DimensionHandlerUtils.convertObjectToType(input, type);
}
return functions;
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class GroupByQueryEngineV2 method convertRowTypesToOutputTypes.
public static void convertRowTypesToOutputTypes(final List<DimensionSpec> dimensionSpecs, final ResultRow resultRow, final int resultRowDimensionStart) {
for (int i = 0; i < dimensionSpecs.size(); i++) {
DimensionSpec dimSpec = dimensionSpecs.get(i);
final int resultRowIndex = resultRowDimensionStart + i;
final ColumnType outputType = dimSpec.getOutputType();
resultRow.set(resultRowIndex, DimensionHandlerUtils.convertObjectToType(resultRow.get(resultRowIndex), outputType));
}
}
Aggregations