use of org.apache.druid.timeline.partition.BuildingShardSpec in project druid by druid-io.
the class SegmentPublisherHelper method annotateShardSpec.
/**
* This method fills missing information in the shard spec if necessary when publishing segments.
*
* - When time chunk lock is used, the non-appending task should set the proper size of the core partitions for
* dynamically-partitioned segments. See {@link #annotateCorePartitionSetSizeFn}.
* - When segment lock is used, the overwriting task should set the proper size of the atomic update group.
* See {@link #annotateAtomicUpdateGroupFn}.
*/
static Set<DataSegment> annotateShardSpec(Set<DataSegment> segments) {
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
segments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
final Interval interval = entry.getKey();
final List<DataSegment> segmentsPerInterval = entry.getValue();
final ShardSpec firstShardSpec = segmentsPerInterval.get(0).getShardSpec();
final boolean anyMismatch = segmentsPerInterval.stream().anyMatch(segment -> segment.getShardSpec().getClass() != firstShardSpec.getClass());
if (anyMismatch) {
throw new ISE("Mismatched shardSpecs in interval[%s] for segments[%s]", interval, segmentsPerInterval);
}
final Function<DataSegment, DataSegment> annotateFn;
if (firstShardSpec instanceof OverwriteShardSpec) {
annotateFn = annotateAtomicUpdateGroupFn(segmentsPerInterval.size());
} else if (firstShardSpec instanceof BuildingShardSpec) {
// sanity check
// BuildingShardSpec is used in non-appending mode. In this mode,
// the segments in each interval should have contiguous partitionIds,
// so that they can be queryable (see PartitionHolder.isComplete()).
int expectedCorePartitionSetSize = segmentsPerInterval.size();
int actualCorePartitionSetSize = Math.toIntExact(segmentsPerInterval.stream().filter(segment -> segment.getShardSpec().getPartitionNum() < expectedCorePartitionSetSize).count());
if (expectedCorePartitionSetSize != actualCorePartitionSetSize) {
LOG.errorSegments(segmentsPerInterval, "Cannot publish segments due to incomplete time chunk");
throw new ISE("Cannot publish segments due to incomplete time chunk for interval[%s]. " + "Expected [%s] segments in the core partition, but only [%] segments are found. " + "See task logs for more details about these segments.", interval, expectedCorePartitionSetSize, actualCorePartitionSetSize);
}
annotateFn = annotateCorePartitionSetSizeFn(expectedCorePartitionSetSize);
} else if (firstShardSpec instanceof BucketNumberedShardSpec) {
throw new ISE("Cannot publish segments with shardSpec[%s]", firstShardSpec);
} else {
annotateFn = null;
}
if (annotateFn != null) {
intervalToSegments.put(interval, segmentsPerInterval.stream().map(annotateFn).collect(Collectors.toList()));
}
}
return intervalToSegments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet());
}
use of org.apache.druid.timeline.partition.BuildingShardSpec in project druid by druid-io.
the class ParallelIndexSupervisorTask method getPartitionToLocations.
/**
* Creates a map from partition (interval + bucketId) to the corresponding
* PartitionLocations. Note that the bucketId maybe different from the final
* partitionId (refer to {@link BuildingShardSpec} for more details).
*/
static Map<Partition, List<PartitionLocation>> getPartitionToLocations(Map<String, GeneratedPartitionsReport> subTaskIdToReport) {
// Create a map from partition to list of reports (PartitionStat and subTaskId)
final Map<Partition, List<PartitionReport>> partitionToReports = new TreeMap<>(// Sort by (interval, bucketId) to maintain order of partitionIds within interval
Comparator.comparingLong((Partition partition) -> partition.getInterval().getStartMillis()).thenComparingLong(partition -> partition.getInterval().getEndMillis()).thenComparingInt(Partition::getBucketId));
subTaskIdToReport.forEach((subTaskId, report) -> report.getPartitionStats().forEach(partitionStat -> partitionToReports.computeIfAbsent(Partition.fromStat(partitionStat), p -> new ArrayList<>()).add(new PartitionReport(subTaskId, partitionStat))));
final Map<Partition, List<PartitionLocation>> partitionToLocations = new HashMap<>();
Interval prevInterval = null;
final AtomicInteger partitionId = new AtomicInteger(0);
for (Entry<Partition, List<PartitionReport>> entry : partitionToReports.entrySet()) {
final Partition partition = entry.getKey();
// Reset the partitionId if this is a new interval
Interval interval = partition.getInterval();
if (!interval.equals(prevInterval)) {
partitionId.set(0);
prevInterval = interval;
}
// Use any PartitionStat of this partition to create a shard spec
final List<PartitionReport> reportsOfPartition = entry.getValue();
final BuildingShardSpec<?> shardSpec = reportsOfPartition.get(0).getPartitionStat().getSecondaryPartition().convert(partitionId.getAndIncrement());
// Create a PartitionLocation for each PartitionStat
List<PartitionLocation> locationsOfPartition = reportsOfPartition.stream().map(report -> report.getPartitionStat().toPartitionLocation(report.getSubTaskId(), shardSpec)).collect(Collectors.toList());
partitionToLocations.put(partition, locationsOfPartition);
}
return partitionToLocations;
}
use of org.apache.druid.timeline.partition.BuildingShardSpec in project druid by druid-io.
the class PartialGenericSegmentMergeTask method createIntervalAndIntegerToShardSpec.
private static Table<Interval, Integer, BuildingShardSpec<?>> createIntervalAndIntegerToShardSpec(List<PartitionLocation> partitionLocations) {
final Table<Interval, Integer, BuildingShardSpec<?>> intervalAndIntegerToShardSpec = HashBasedTable.create();
partitionLocations.forEach(p -> {
final ShardSpec currShardSpec = intervalAndIntegerToShardSpec.get(p.getInterval(), p.getBucketId());
if (currShardSpec == null) {
intervalAndIntegerToShardSpec.put(p.getInterval(), p.getBucketId(), p.getShardSpec());
} else {
if (!p.getShardSpec().equals(currShardSpec)) {
throw new ISE("interval %s, bucketId %s mismatched shard specs: %s and %s", p.getInterval(), p.getBucketId(), currShardSpec, p.getShardSpec());
}
}
});
return intervalAndIntegerToShardSpec;
}
Aggregations