Search in sources :

Example 1 with OverwriteShardSpec

use of org.apache.druid.timeline.partition.OverwriteShardSpec in project druid by druid-io.

the class SegmentPublisherHelper method annotateShardSpec.

/**
 * This method fills missing information in the shard spec if necessary when publishing segments.
 *
 * - When time chunk lock is used, the non-appending task should set the proper size of the core partitions for
 *   dynamically-partitioned segments. See {@link #annotateCorePartitionSetSizeFn}.
 * - When segment lock is used, the overwriting task should set the proper size of the atomic update group.
 *   See {@link #annotateAtomicUpdateGroupFn}.
 */
static Set<DataSegment> annotateShardSpec(Set<DataSegment> segments) {
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    segments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segmentsPerInterval = entry.getValue();
        final ShardSpec firstShardSpec = segmentsPerInterval.get(0).getShardSpec();
        final boolean anyMismatch = segmentsPerInterval.stream().anyMatch(segment -> segment.getShardSpec().getClass() != firstShardSpec.getClass());
        if (anyMismatch) {
            throw new ISE("Mismatched shardSpecs in interval[%s] for segments[%s]", interval, segmentsPerInterval);
        }
        final Function<DataSegment, DataSegment> annotateFn;
        if (firstShardSpec instanceof OverwriteShardSpec) {
            annotateFn = annotateAtomicUpdateGroupFn(segmentsPerInterval.size());
        } else if (firstShardSpec instanceof BuildingShardSpec) {
            // sanity check
            // BuildingShardSpec is used in non-appending mode. In this mode,
            // the segments in each interval should have contiguous partitionIds,
            // so that they can be queryable (see PartitionHolder.isComplete()).
            int expectedCorePartitionSetSize = segmentsPerInterval.size();
            int actualCorePartitionSetSize = Math.toIntExact(segmentsPerInterval.stream().filter(segment -> segment.getShardSpec().getPartitionNum() < expectedCorePartitionSetSize).count());
            if (expectedCorePartitionSetSize != actualCorePartitionSetSize) {
                LOG.errorSegments(segmentsPerInterval, "Cannot publish segments due to incomplete time chunk");
                throw new ISE("Cannot publish segments due to incomplete time chunk for interval[%s]. " + "Expected [%s] segments in the core partition, but only [%] segments are found. " + "See task logs for more details about these segments.", interval, expectedCorePartitionSetSize, actualCorePartitionSetSize);
            }
            annotateFn = annotateCorePartitionSetSizeFn(expectedCorePartitionSetSize);
        } else if (firstShardSpec instanceof BucketNumberedShardSpec) {
            throw new ISE("Cannot publish segments with shardSpec[%s]", firstShardSpec);
        } else {
            annotateFn = null;
        }
        if (annotateFn != null) {
            intervalToSegments.put(interval, segmentsPerInterval.stream().map(annotateFn).collect(Collectors.toList()));
        }
    }
    return intervalToSegments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet());
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) Collection(java.util.Collection) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) HashMap(java.util.HashMap) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) List(java.util.List) Map(java.util.Map) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) Interval(org.joda.time.Interval)

Aggregations

ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Set (java.util.Set)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 ISE (org.apache.druid.java.util.common.ISE)1 Logger (org.apache.druid.java.util.common.logger.Logger)1 DataSegment (org.apache.druid.timeline.DataSegment)1 BucketNumberedShardSpec (org.apache.druid.timeline.partition.BucketNumberedShardSpec)1 BuildingShardSpec (org.apache.druid.timeline.partition.BuildingShardSpec)1 OverwriteShardSpec (org.apache.druid.timeline.partition.OverwriteShardSpec)1 ShardSpec (org.apache.druid.timeline.partition.ShardSpec)1 Interval (org.joda.time.Interval)1