Search in sources :

Example 1 with LinearPartitionAnalysis

use of org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis in project druid by druid-io.

the class IndexTask method createLinearPartitionAnalysis.

private static LinearPartitionAnalysis createLinearPartitionAnalysis(GranularitySpec granularitySpec, @Nonnull DynamicPartitionsSpec partitionsSpec) {
    final Iterable<Interval> intervals = granularitySpec.sortedBucketIntervals();
    final int numBucketsPerInterval = 1;
    final LinearPartitionAnalysis partitionAnalysis = new LinearPartitionAnalysis(partitionsSpec);
    intervals.forEach(interval -> partitionAnalysis.updateBucket(interval, numBucketsPerInterval));
    return partitionAnalysis;
}
Also used : LinearPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis) Interval(org.joda.time.Interval)

Example 2 with LinearPartitionAnalysis

use of org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis in project druid by druid-io.

the class IndexTask method createShardSpecsFromInput.

private PartitionAnalysis createShardSpecsFromInput(ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, InputSource inputSource, File tmpDir, GranularitySpec granularitySpec, @Nonnull PartitionsSpec partitionsSpec, boolean determineIntervals) throws IOException {
    assert partitionsSpec.getType() != SecondaryPartitionType.RANGE;
    long determineShardSpecsStartMillis = System.currentTimeMillis();
    final Map<Interval, Optional<HyperLogLogCollector>> hllCollectors = collectIntervalsAndShardSpecs(jsonMapper, ingestionSchema, inputSource, tmpDir, granularitySpec, partitionsSpec, determineIntervals);
    final PartitionAnalysis<Integer, ?> partitionAnalysis;
    if (partitionsSpec.getType() == SecondaryPartitionType.LINEAR) {
        partitionAnalysis = new LinearPartitionAnalysis((DynamicPartitionsSpec) partitionsSpec);
    } else if (partitionsSpec.getType() == SecondaryPartitionType.HASH) {
        partitionAnalysis = new HashPartitionAnalysis((HashedPartitionsSpec) partitionsSpec);
    } else {
        throw new UOE("%s", partitionsSpec.getClass().getName());
    }
    for (final Map.Entry<Interval, Optional<HyperLogLogCollector>> entry : hllCollectors.entrySet()) {
        final Interval interval = entry.getKey();
        final int numBucketsPerInterval;
        if (partitionsSpec.getType() == SecondaryPartitionType.HASH) {
            final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec;
            final HyperLogLogCollector collector = entry.getValue().orNull();
            if (partitionsSpec.needsDeterminePartitions(false)) {
                final long numRows = Preconditions.checkNotNull(collector, "HLL collector").estimateCardinalityRound();
                final int nonNullMaxRowsPerSegment = partitionsSpec.getMaxRowsPerSegment() == null ? PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT : partitionsSpec.getMaxRowsPerSegment();
                numBucketsPerInterval = (int) Math.ceil((double) numRows / nonNullMaxRowsPerSegment);
                log.info("Estimated [%,d] rows of data for interval [%s], creating [%,d] shards", numRows, interval, numBucketsPerInterval);
            } else {
                numBucketsPerInterval = hashedPartitionsSpec.getNumShards() == null ? 1 : hashedPartitionsSpec.getNumShards();
                log.info("Creating [%,d] buckets for interval [%s]", numBucketsPerInterval, interval);
            }
        } else {
            numBucketsPerInterval = 1;
        }
        partitionAnalysis.updateBucket(interval, numBucketsPerInterval);
    }
    log.info("Found intervals and shardSpecs in %,dms", System.currentTimeMillis() - determineShardSpecsStartMillis);
    return partitionAnalysis;
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Optional(com.google.common.base.Optional) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) UOE(org.apache.druid.java.util.common.UOE) LinearPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis) HashPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.HashPartitionAnalysis) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) Map(java.util.Map) TreeMap(java.util.TreeMap) HashMap(java.util.HashMap) Interval(org.joda.time.Interval)

Aggregations

LinearPartitionAnalysis (org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis)2 Interval (org.joda.time.Interval)2 Optional (com.google.common.base.Optional)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)1 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)1 HashPartitionAnalysis (org.apache.druid.indexing.common.task.batch.partition.HashPartitionAnalysis)1 UOE (org.apache.druid.java.util.common.UOE)1