use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class AvgSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
final List<DruidExpression> arguments = Aggregations.getArgumentsForSimpleAggregator(plannerContext, rowSignature, aggregateCall, project);
if (arguments == null) {
return null;
}
final String countName = Calcites.makePrefixedName(name, "count");
final AggregatorFactory count = CountSqlAggregator.createCountAggregatorFactory(countName, plannerContext, rowSignature, virtualColumnRegistry, rexBuilder, aggregateCall, project);
final String fieldName;
final String expression;
final DruidExpression arg = Iterables.getOnlyElement(arguments);
final ExprMacroTable macroTable = plannerContext.getExprMacroTable();
final ValueType sumType;
// Use 64-bit sum regardless of the type of the AVG aggregator.
if (SqlTypeName.INT_TYPES.contains(aggregateCall.getType().getSqlTypeName())) {
sumType = ValueType.LONG;
} else {
sumType = ValueType.DOUBLE;
}
if (arg.isDirectColumnAccess()) {
fieldName = arg.getDirectColumn();
expression = null;
} else {
// if the filter or anywhere else defined a virtual column for us, re-use it
final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
String vc = virtualColumnRegistry.getVirtualColumnByExpression(arg, resolutionArg.getType());
fieldName = vc != null ? vc : null;
expression = vc != null ? null : arg.getExpression();
}
final String sumName = Calcites.makePrefixedName(name, "sum");
final AggregatorFactory sum = SumSqlAggregator.createSumAggregatorFactory(sumType, sumName, fieldName, expression, macroTable);
return Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
final Metadata segmentMetadata;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
FileUtils.mkdirp(outDir);
SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
closer.register(segmentWriteOutMedium);
long startTime = System.currentTimeMillis();
Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
if (customSegmentLoader != null) {
mapper.writeValue(fos, customSegmentLoader);
} else {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
}
log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMergerV9> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionHandler handler = handlers.get(mergedDimensions.get(i));
mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
}
/**
*********** Setup Dim Conversions *************
*/
progress.progress();
startTime = System.currentTimeMillis();
writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/**
*********** Walk through data sets, merge them, and write merged columns ************
*/
progress.progress();
final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
closer.register(timeAndDimsIterator);
final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
/**
********** Create Inverted Indexes and Finalize Build Columns ************
*/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/**
*********** Make index.drd & metadata.drd files *************
*/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class IndexMergerV9 method merge.
private File merge(List<IndexableAdapter> indexes, final boolean rollup, final AggregatorFactory[] metricAggs, @Nullable DimensionsSpec dimensionsSpec, File outDir, IndexSpec indexSpec, ProgressIndicator progress, @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
final List<String> mergedDimensions = IndexMerger.getMergedDimensions(indexes, dimensionsSpec);
final List<String> mergedMetrics = IndexMerger.mergeIndexed(indexes.stream().map(IndexableAdapter::getMetricNames).collect(Collectors.toList()));
final AggregatorFactory[] sortedMetricAggs = new AggregatorFactory[mergedMetrics.size()];
for (AggregatorFactory metricAgg : metricAggs) {
int metricIndex = mergedMetrics.indexOf(metricAgg.getName());
/*
If metricIndex is negative, one of the metricAggs was not present in the union of metrics from the indices
we are merging
*/
if (metricIndex > -1) {
sortedMetricAggs[metricIndex] = metricAgg;
}
}
/*
If there is nothing at sortedMetricAggs[i], then we did not have a metricAgg whose name matched the name
of the ith element of mergedMetrics. I.e. There was a metric in the indices to merge that we did not ask for.
*/
for (int i = 0; i < sortedMetricAggs.length; i++) {
if (sortedMetricAggs[i] == null) {
throw new IAE("Indices to merge contained metric[%s], but requested metrics did not", mergedMetrics.get(i));
}
}
for (int i = 0; i < mergedMetrics.size(); i++) {
if (!sortedMetricAggs[i].getName().equals(mergedMetrics.get(i))) {
throw new IAE("Metric mismatch, index[%d] [%s] != [%s]", i, sortedMetricAggs[i].getName(), mergedMetrics.get(i));
}
}
Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn;
if (rollup) {
rowMergerFn = rowIterators -> new RowCombiningTimeAndDimsIterator(rowIterators, sortedMetricAggs, mergedMetrics);
} else {
rowMergerFn = MergingRowIterator::new;
}
return makeIndexFiles(indexes, sortedMetricAggs, outDir, progress, mergedDimensions, mergedMetrics, rowMergerFn, true, indexSpec, segmentWriteOutMediumFactory);
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class InputRowSerde method fromBytes.
public static InputRow fromBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, byte[] data, AggregatorFactory[] aggs) {
try {
ByteArrayDataInput in = ByteStreams.newDataInput(data);
// Read timestamp
long timestamp = in.readLong();
Map<String, Object> event = new HashMap<>();
// Read dimensions
List<String> dimensions = new ArrayList<>();
int dimNum = WritableUtils.readVInt(in);
for (int i = 0; i < dimNum; i++) {
String dimension = readString(in);
dimensions.add(dimension);
IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dimension);
if (typeHelper == null) {
typeHelper = STRING_HELPER;
}
Object dimValues = typeHelper.deserialize(in);
if (dimValues == null) {
continue;
}
if (typeHelper.getType() == ValueType.STRING) {
List<String> dimensionValues = (List<String>) dimValues;
if (dimensionValues.size() == 1) {
event.put(dimension, dimensionValues.get(0));
} else {
event.put(dimension, dimensionValues);
}
} else {
event.put(dimension, dimValues);
}
}
// Read metrics
int metricSize = WritableUtils.readVInt(in);
for (int i = 0; i < metricSize; i++) {
final String metric = readString(in);
final AggregatorFactory agg = getAggregator(metric, aggs, i);
final ColumnType type = agg.getIntermediateType();
final byte metricNullability = in.readByte();
if (metricNullability == NullHandling.IS_NULL_BYTE) {
// metric value is null.
continue;
}
if (type.is(ValueType.FLOAT)) {
event.put(metric, in.readFloat());
} else if (type.is(ValueType.LONG)) {
event.put(metric, WritableUtils.readVLong(in));
} else if (type.is(ValueType.DOUBLE)) {
event.put(metric, in.readDouble());
} else {
ComplexMetricSerde serde = getComplexMetricSerde(agg.getIntermediateType().getComplexTypeName());
byte[] value = readBytes(in);
event.put(metric, serde.fromBytes(value, 0, value.length));
}
}
return new MapBasedInputRow(timestamp, dimensions, event);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.
private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
return config;
}
Aggregations