Search in sources :

Example 6 with Union

use of org.apache.datasketches.hll.Union in project druid by druid-io.

the class HllSketchMergeVectorAggregator method aggregate.

@Override
public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) {
    final Object[] vector = objectSupplier.get();
    final WritableMemory mem = WritableMemory.writableWrap(buf, ByteOrder.LITTLE_ENDIAN).writableRegion(position, helper.getSize());
    final Union union = Union.writableWrap(mem);
    for (int i = startRow; i < endRow; i++) {
        union.update((HllSketch) vector[i]);
    }
}
Also used : WritableMemory(org.apache.datasketches.memory.WritableMemory) Union(org.apache.datasketches.hll.Union)

Example 7 with Union

use of org.apache.datasketches.hll.Union in project druid by druid-io.

the class HllSketchUnionPostAggregator method compute.

@Override
public HllSketch compute(final Map<String, Object> combinedAggregators) {
    final Union union = new Union(lgK);
    for (final PostAggregator field : fields) {
        final HllSketch sketch = (HllSketch) field.compute(combinedAggregators);
        union.update(sketch);
    }
    return union.getResult(tgtHllType);
}
Also used : HllSketch(org.apache.datasketches.hll.HllSketch) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Union(org.apache.datasketches.hll.Union)

Example 8 with Union

use of org.apache.datasketches.hll.Union in project druid by druid-io.

the class ParallelIndexSupervisorTask method determineNumShardsFromCardinalityReport.

@VisibleForTesting
public static Map<Interval, Integer> determineNumShardsFromCardinalityReport(Collection<DimensionCardinalityReport> reports, int maxRowsPerSegment) {
    // aggregate all the sub-reports
    Map<Interval, Union> finalCollectors = mergeCardinalityReports(reports);
    return CollectionUtils.mapValues(finalCollectors, union -> {
        final double estimatedCardinality = union.getEstimate();
        // determine numShards based on maxRowsPerSegment and the cardinality
        final long estimatedNumShards = Math.round(estimatedCardinality / maxRowsPerSegment);
        try {
            return Math.max(Math.toIntExact(estimatedNumShards), 1);
        } catch (ArithmeticException ae) {
            throw new ISE("Estimated numShards [%s] exceeds integer bounds.", estimatedNumShards);
        }
    });
}
Also used : ISE(org.apache.druid.java.util.common.ISE) Union(org.apache.datasketches.hll.Union) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with Union

use of org.apache.datasketches.hll.Union in project druid by druid-io.

the class ParallelIndexSupervisorTask method mergeCardinalityReports.

private static Map<Interval, Union> mergeCardinalityReports(Collection<DimensionCardinalityReport> reports) {
    Map<Interval, Union> finalCollectors = new HashMap<>();
    reports.forEach(report -> {
        Map<Interval, byte[]> intervalToCardinality = report.getIntervalToCardinalities();
        for (Map.Entry<Interval, byte[]> entry : intervalToCardinality.entrySet()) {
            HllSketch entryHll = HllSketch.wrap(Memory.wrap(entry.getValue()));
            finalCollectors.computeIfAbsent(entry.getKey(), k -> new Union(DimensionCardinalityReport.HLL_SKETCH_LOG_K)).update(entryHll);
        }
    });
    return finalCollectors;
}
Also used : HllSketch(org.apache.datasketches.hll.HllSketch) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Produces(javax.ws.rs.Produces) IngestionState(org.apache.druid.indexer.IngestionState) Pair(org.apache.druid.java.util.common.Pair) MediaType(javax.ws.rs.core.MediaType) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) AbstractBatchIndexTask(org.apache.druid.indexing.common.task.AbstractBatchIndexTask) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskState(org.apache.druid.indexer.TaskState) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) IndexTaskUtils(org.apache.druid.indexing.common.task.IndexTaskUtils) Granularity(org.apache.druid.java.util.common.granularity.Granularity) GET(javax.ws.rs.GET) Tasks(org.apache.druid.indexing.common.task.Tasks) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) IndexTask(org.apache.druid.indexing.common.task.IndexTask) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) StringSketchMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) StringDistributionMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger) IOException(java.io.IOException) TreeMap(java.util.TreeMap) ChatHandlers(org.apache.druid.segment.realtime.firehose.ChatHandlers) Preconditions(com.google.common.base.Preconditions) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus) HllSketch(org.apache.datasketches.hll.HllSketch) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) Memory(org.apache.datasketches.memory.Memory) TaskResource(org.apache.druid.indexing.common.task.TaskResource) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) Consumes(javax.ws.rs.Consumes) Union(org.apache.datasketches.hll.Union) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SmileMediaTypes(com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes) Context(javax.ws.rs.core.Context) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Action(org.apache.druid.server.security.Action) Collectors(java.util.stream.Collectors) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) Objects(java.util.Objects) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) CurrentSubTaskHolder(org.apache.druid.indexing.common.task.CurrentSubTaskHolder) Logger(org.apache.druid.java.util.common.logger.Logger) PathParam(javax.ws.rs.PathParam) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) HashSet(java.util.HashSet) InputSource(org.apache.druid.data.input.InputSource) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Status(javax.ws.rs.core.Response.Status) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) POST(javax.ws.rs.POST) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) IntermediaryDataManager(org.apache.druid.indexing.worker.shuffle.IntermediaryDataManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Union(org.apache.datasketches.hll.Union) Interval(org.joda.time.Interval)

Aggregations

Union (org.apache.datasketches.hll.Union)9 HllSketch (org.apache.datasketches.hll.HllSketch)5 WritableMemory (org.apache.datasketches.memory.WritableMemory)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ISE (org.apache.druid.java.util.common.ISE)2 Interval (org.joda.time.Interval)2 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)1 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)1 SmileMediaTypes (com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes)1 Preconditions (com.google.common.base.Preconditions)1 Throwables (com.google.common.base.Throwables)1 ArrayListMultimap (com.google.common.collect.ArrayListMultimap)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Multimap (com.google.common.collect.Multimap)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1