use of org.apache.datasketches.hll.Union in project druid by druid-io.
the class HllSketchMergeVectorAggregator method aggregate.
@Override
public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) {
final Object[] vector = objectSupplier.get();
final WritableMemory mem = WritableMemory.writableWrap(buf, ByteOrder.LITTLE_ENDIAN).writableRegion(position, helper.getSize());
final Union union = Union.writableWrap(mem);
for (int i = startRow; i < endRow; i++) {
union.update((HllSketch) vector[i]);
}
}
use of org.apache.datasketches.hll.Union in project druid by druid-io.
the class HllSketchUnionPostAggregator method compute.
@Override
public HllSketch compute(final Map<String, Object> combinedAggregators) {
final Union union = new Union(lgK);
for (final PostAggregator field : fields) {
final HllSketch sketch = (HllSketch) field.compute(combinedAggregators);
union.update(sketch);
}
return union.getResult(tgtHllType);
}
use of org.apache.datasketches.hll.Union in project druid by druid-io.
the class ParallelIndexSupervisorTask method determineNumShardsFromCardinalityReport.
@VisibleForTesting
public static Map<Interval, Integer> determineNumShardsFromCardinalityReport(Collection<DimensionCardinalityReport> reports, int maxRowsPerSegment) {
// aggregate all the sub-reports
Map<Interval, Union> finalCollectors = mergeCardinalityReports(reports);
return CollectionUtils.mapValues(finalCollectors, union -> {
final double estimatedCardinality = union.getEstimate();
// determine numShards based on maxRowsPerSegment and the cardinality
final long estimatedNumShards = Math.round(estimatedCardinality / maxRowsPerSegment);
try {
return Math.max(Math.toIntExact(estimatedNumShards), 1);
} catch (ArithmeticException ae) {
throw new ISE("Estimated numShards [%s] exceeds integer bounds.", estimatedNumShards);
}
});
}
use of org.apache.datasketches.hll.Union in project druid by druid-io.
the class ParallelIndexSupervisorTask method mergeCardinalityReports.
private static Map<Interval, Union> mergeCardinalityReports(Collection<DimensionCardinalityReport> reports) {
Map<Interval, Union> finalCollectors = new HashMap<>();
reports.forEach(report -> {
Map<Interval, byte[]> intervalToCardinality = report.getIntervalToCardinalities();
for (Map.Entry<Interval, byte[]> entry : intervalToCardinality.entrySet()) {
HllSketch entryHll = HllSketch.wrap(Memory.wrap(entry.getValue()));
finalCollectors.computeIfAbsent(entry.getKey(), k -> new Union(DimensionCardinalityReport.HLL_SKETCH_LOG_K)).update(entryHll);
}
});
return finalCollectors;
}
Aggregations