use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class CompactSegments method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
LOG.info("Compact segments");
final CoordinatorCompactionConfig dynamicConfig = params.getCoordinatorCompactionConfig();
final CoordinatorStats stats = new CoordinatorStats();
List<DataSourceCompactionConfig> compactionConfigList = dynamicConfig.getCompactionConfigs();
if (dynamicConfig.getMaxCompactionTaskSlots() > 0) {
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = params.getUsedSegmentsTimelinesPerDataSource();
if (compactionConfigList != null && !compactionConfigList.isEmpty()) {
Map<String, DataSourceCompactionConfig> compactionConfigs = compactionConfigList.stream().collect(Collectors.toMap(DataSourceCompactionConfig::getDataSource, Function.identity()));
final List<TaskStatusPlus> compactionTasks = filterNonCompactionTasks(indexingServiceClient.getActiveTasks());
// dataSource -> list of intervals for which compaction will be skipped in this run
final Map<String, List<Interval>> intervalsToSkipCompaction = new HashMap<>();
int numEstimatedNonCompleteCompactionTasks = 0;
for (TaskStatusPlus status : compactionTasks) {
final TaskPayloadResponse response = indexingServiceClient.getTaskPayload(status.getId());
if (response == null) {
throw new ISE("Got a null paylord from overlord for task[%s]", status.getId());
}
if (COMPACTION_TASK_TYPE.equals(response.getPayload().getType())) {
final ClientCompactionTaskQuery compactionTaskQuery = (ClientCompactionTaskQuery) response.getPayload();
DataSourceCompactionConfig dataSourceCompactionConfig = compactionConfigs.get(status.getDataSource());
if (dataSourceCompactionConfig != null && dataSourceCompactionConfig.getGranularitySpec() != null) {
Granularity configuredSegmentGranularity = dataSourceCompactionConfig.getGranularitySpec().getSegmentGranularity();
if (configuredSegmentGranularity != null && compactionTaskQuery.getGranularitySpec() != null && !configuredSegmentGranularity.equals(compactionTaskQuery.getGranularitySpec().getSegmentGranularity())) {
// We will cancel active compaction task if segmentGranularity changes and we will need to
// re-compact the interval
LOG.info("Canceled task[%s] as task segmentGranularity is [%s] but compaction config " + "segmentGranularity is [%s]", status.getId(), compactionTaskQuery.getGranularitySpec().getSegmentGranularity(), configuredSegmentGranularity);
indexingServiceClient.cancelTask(status.getId());
continue;
}
}
// Skip interval as the current active compaction task is good
final Interval interval = compactionTaskQuery.getIoConfig().getInputSpec().getInterval();
intervalsToSkipCompaction.computeIfAbsent(status.getDataSource(), k -> new ArrayList<>()).add(interval);
// Since we keep the current active compaction task running, we count the active task slots
numEstimatedNonCompleteCompactionTasks += findMaxNumTaskSlotsUsedByOneCompactionTask(compactionTaskQuery.getTuningConfig());
} else {
throw new ISE("task[%s] is not a compactionTask", status.getId());
}
}
// Skip all the intervals locked by higher priority tasks for each datasource
// This must be done after the invalid compaction tasks are cancelled
// in the loop above so that their intervals are not considered locked
getLockedIntervalsToSkip(compactionConfigList).forEach((dataSource, intervals) -> intervalsToSkipCompaction.computeIfAbsent(dataSource, ds -> new ArrayList<>()).addAll(intervals));
final CompactionSegmentIterator iterator = policy.reset(compactionConfigs, dataSources, intervalsToSkipCompaction);
int totalCapacity;
if (dynamicConfig.isUseAutoScaleSlots()) {
try {
totalCapacity = indexingServiceClient.getTotalWorkerCapacityWithAutoScale();
} catch (Exception e) {
LOG.warn("Failed to get total worker capacity with auto scale slots. Falling back to current capacity count");
totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
}
} else {
totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
}
final int compactionTaskCapacity = (int) Math.min(totalCapacity * dynamicConfig.getCompactionTaskSlotRatio(), dynamicConfig.getMaxCompactionTaskSlots());
final int numAvailableCompactionTaskSlots;
if (numEstimatedNonCompleteCompactionTasks > 0) {
numAvailableCompactionTaskSlots = Math.max(0, compactionTaskCapacity - numEstimatedNonCompleteCompactionTasks);
} else {
// compactionTaskCapacity might be 0 if totalWorkerCapacity is low.
// This guarantees that at least one slot is available if
// compaction is enabled and numEstimatedNonCompleteCompactionTasks is 0.
numAvailableCompactionTaskSlots = Math.max(1, compactionTaskCapacity);
}
LOG.info("Found [%d] available task slots for compaction out of [%d] max compaction task capacity", numAvailableCompactionTaskSlots, compactionTaskCapacity);
stats.addToGlobalStat(AVAILABLE_COMPACTION_TASK_SLOT, numAvailableCompactionTaskSlots);
stats.addToGlobalStat(MAX_COMPACTION_TASK_SLOT, compactionTaskCapacity);
final Map<String, AutoCompactionSnapshot.Builder> currentRunAutoCompactionSnapshotBuilders = new HashMap<>();
if (numAvailableCompactionTaskSlots > 0) {
stats.accumulate(doRun(compactionConfigs, currentRunAutoCompactionSnapshotBuilders, numAvailableCompactionTaskSlots, iterator));
} else {
stats.accumulate(makeStats(currentRunAutoCompactionSnapshotBuilders, 0, iterator));
}
} else {
LOG.info("compactionConfig is empty. Skip.");
autoCompactionSnapshotPerDataSource.set(new HashMap<>());
}
} else {
LOG.info("maxCompactionTaskSlots was set to 0. Skip compaction");
autoCompactionSnapshotPerDataSource.set(new HashMap<>());
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class MarkAsUnusedOvershadowedSegments method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
// Mark as unused overshadowed segments only if we've had enough time to make sure we aren't flapping with old data.
if (!params.coordinatorIsLeadingEnoughTimeToMarkAsUnusedOvershadowedSegements()) {
return params;
}
CoordinatorStats stats = new CoordinatorStats();
DruidCluster cluster = params.getDruidCluster();
Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = new HashMap<>();
for (SortedSet<ServerHolder> serverHolders : cluster.getSortedHistoricalsByTier()) {
for (ServerHolder serverHolder : serverHolders) {
addSegmentsFromServer(serverHolder, timelines);
}
}
for (ServerHolder serverHolder : cluster.getBrokers()) {
addSegmentsFromServer(serverHolder, timelines);
}
// Mark all segments as unused in db that are overshadowed by served segments
for (DataSegment dataSegment : params.getUsedSegments()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSegment.getDataSource());
if (timeline != null && timeline.isOvershadowed(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment)) {
coordinator.markSegmentAsUnused(dataSegment);
stats.addToGlobalStat("overShadowedCount", 1);
}
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method createTimeline.
private static VersionedIntervalTimeline<String, DataSegment> createTimeline(SegmentGenerateSpec... specs) {
List<DataSegment> segments = new ArrayList<>();
final String version = DateTimes.nowUtc().toString();
final List<SegmentGenerateSpec> orderedSpecs = Arrays.asList(specs);
orderedSpecs.sort(Comparator.comparing(s -> s.totalInterval, Comparators.intervalsByStartThenEnd().reversed()));
for (SegmentGenerateSpec spec : orderedSpecs) {
Interval remainingInterval = spec.totalInterval;
while (!Intervals.isEmpty(remainingInterval)) {
final Interval segmentInterval;
if (remainingInterval.toDuration().isLongerThan(spec.segmentPeriod.toStandardDuration())) {
segmentInterval = new Interval(spec.segmentPeriod, remainingInterval.getEnd());
} else {
segmentInterval = remainingInterval;
}
for (int i = 0; i < spec.numSegmentsPerShard; i++) {
final ShardSpec shardSpec = new NumberedShardSpec(i, spec.numSegmentsPerShard);
final DataSegment segment = new DataSegment(DATA_SOURCE, segmentInterval, spec.version == null ? version : spec.version, null, ImmutableList.of(), ImmutableList.of(), shardSpec, spec.lastCompactionState, 0, spec.segmentSize);
segments.add(segment);
}
remainingInterval = SegmentCompactionUtil.removeIntervalFromEnd(remainingInterval, segmentInterval);
}
}
return VersionedIntervalTimeline.forSegments(segments);
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class TimeBoundaryQueryRunnerTest method getCustomRunner.
private QueryRunner getCustomRunner() throws IOException {
CharSource v_0112 = CharSource.wrap(StringUtils.join(V_0112, "\n"));
CharSource v_0113 = CharSource.wrap(StringUtils.join(V_0113, "\n"));
IncrementalIndex index0 = TestIndex.loadIncrementalIndex(newIndex("2011-01-12T00:00:00.000Z"), v_0112);
IncrementalIndex index1 = TestIndex.loadIncrementalIndex(newIndex("2011-01-14T00:00:00.000Z"), v_0113);
segment0 = new IncrementalIndexSegment(index0, makeIdentifier(index0, "v1"));
segment1 = new IncrementalIndexSegment(index1, makeIdentifier(index1, "v1"));
VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline = new VersionedIntervalTimeline<>(StringComparators.LEXICOGRAPHIC);
timeline.add(index0.getInterval(), "v1", new SingleElementPartitionChunk<>(ReferenceCountingSegment.wrapRootGenerationSegment(segment0)));
timeline.add(index1.getInterval(), "v1", new SingleElementPartitionChunk<>(ReferenceCountingSegment.wrapRootGenerationSegment(segment1)));
return QueryRunnerTestHelper.makeFilteringQueryRunner(timeline, FACTORY);
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class CachingClusteredClientPerfTest method testGetQueryRunnerForSegments_singleIntervalLargeSegments.
@Test(timeout = 10_000)
public void testGetQueryRunnerForSegments_singleIntervalLargeSegments() {
final int segmentCount = 30_000;
final Interval interval = Intervals.of("2021-02-13/2021-02-14");
final List<SegmentDescriptor> segmentDescriptors = new ArrayList<>(segmentCount);
final List<DataSegment> dataSegments = new ArrayList<>(segmentCount);
final VersionedIntervalTimeline<String, ServerSelector> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
final DruidServer server = new DruidServer("server", "localhost:9000", null, Long.MAX_VALUE, ServerType.HISTORICAL, DruidServer.DEFAULT_TIER, DruidServer.DEFAULT_PRIORITY);
for (int ii = 0; ii < segmentCount; ii++) {
segmentDescriptors.add(new SegmentDescriptor(interval, "1", ii));
DataSegment segment = makeDataSegment("test", interval, "1", ii);
dataSegments.add(segment);
}
timeline.addAll(Iterators.transform(dataSegments.iterator(), segment -> {
ServerSelector ss = new ServerSelector(segment, new HighestPriorityTierSelectorStrategy(new RandomServerSelectorStrategy()));
ss.addServerAndUpdateSegment(new QueryableDruidServer(server, new MockQueryRunner()), segment);
return new VersionedIntervalTimeline.PartitionChunkEntry<>(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(ss));
}));
TimelineServerView serverView = Mockito.mock(TimelineServerView.class);
QueryScheduler queryScheduler = Mockito.mock(QueryScheduler.class);
// mock scheduler to return same sequence as argument
Mockito.when(queryScheduler.run(any(), any())).thenAnswer(i -> i.getArgument(1));
Mockito.when(queryScheduler.prioritizeAndLaneQuery(any(), any())).thenAnswer(i -> ((QueryPlus) i.getArgument(0)).getQuery());
Mockito.doReturn(Optional.of(timeline)).when(serverView).getTimeline(any());
Mockito.doReturn(new MockQueryRunner()).when(serverView).getQueryRunner(any());
CachingClusteredClient cachingClusteredClient = new CachingClusteredClient(new MockQueryToolChestWareHouse(), serverView, MapCache.create(1024), TestHelper.makeJsonMapper(), Mockito.mock(CachePopulator.class), new CacheConfig(), Mockito.mock(DruidHttpClientConfig.class), Mockito.mock(DruidProcessingConfig.class), ForkJoinPool.commonPool(), queryScheduler, NoopJoinableFactory.INSTANCE, new NoopServiceEmitter());
Query<SegmentDescriptor> fakeQuery = makeFakeQuery(interval);
QueryRunner<SegmentDescriptor> queryRunner = cachingClusteredClient.getQueryRunnerForSegments(fakeQuery, segmentDescriptors);
Sequence<SegmentDescriptor> sequence = queryRunner.run(QueryPlus.wrap(fakeQuery));
Assert.assertEquals(segmentDescriptors, sequence.toList());
}
Aggregations