use of org.apache.druid.client.indexing.ClientCompactionTaskQuery in project druid by druid-io.
the class CompactSegments method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
LOG.info("Compact segments");
final CoordinatorCompactionConfig dynamicConfig = params.getCoordinatorCompactionConfig();
final CoordinatorStats stats = new CoordinatorStats();
List<DataSourceCompactionConfig> compactionConfigList = dynamicConfig.getCompactionConfigs();
if (dynamicConfig.getMaxCompactionTaskSlots() > 0) {
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = params.getUsedSegmentsTimelinesPerDataSource();
if (compactionConfigList != null && !compactionConfigList.isEmpty()) {
Map<String, DataSourceCompactionConfig> compactionConfigs = compactionConfigList.stream().collect(Collectors.toMap(DataSourceCompactionConfig::getDataSource, Function.identity()));
final List<TaskStatusPlus> compactionTasks = filterNonCompactionTasks(indexingServiceClient.getActiveTasks());
// dataSource -> list of intervals for which compaction will be skipped in this run
final Map<String, List<Interval>> intervalsToSkipCompaction = new HashMap<>();
int numEstimatedNonCompleteCompactionTasks = 0;
for (TaskStatusPlus status : compactionTasks) {
final TaskPayloadResponse response = indexingServiceClient.getTaskPayload(status.getId());
if (response == null) {
throw new ISE("Got a null paylord from overlord for task[%s]", status.getId());
}
if (COMPACTION_TASK_TYPE.equals(response.getPayload().getType())) {
final ClientCompactionTaskQuery compactionTaskQuery = (ClientCompactionTaskQuery) response.getPayload();
DataSourceCompactionConfig dataSourceCompactionConfig = compactionConfigs.get(status.getDataSource());
if (dataSourceCompactionConfig != null && dataSourceCompactionConfig.getGranularitySpec() != null) {
Granularity configuredSegmentGranularity = dataSourceCompactionConfig.getGranularitySpec().getSegmentGranularity();
if (configuredSegmentGranularity != null && compactionTaskQuery.getGranularitySpec() != null && !configuredSegmentGranularity.equals(compactionTaskQuery.getGranularitySpec().getSegmentGranularity())) {
// We will cancel active compaction task if segmentGranularity changes and we will need to
// re-compact the interval
LOG.info("Canceled task[%s] as task segmentGranularity is [%s] but compaction config " + "segmentGranularity is [%s]", status.getId(), compactionTaskQuery.getGranularitySpec().getSegmentGranularity(), configuredSegmentGranularity);
indexingServiceClient.cancelTask(status.getId());
continue;
}
}
// Skip interval as the current active compaction task is good
final Interval interval = compactionTaskQuery.getIoConfig().getInputSpec().getInterval();
intervalsToSkipCompaction.computeIfAbsent(status.getDataSource(), k -> new ArrayList<>()).add(interval);
// Since we keep the current active compaction task running, we count the active task slots
numEstimatedNonCompleteCompactionTasks += findMaxNumTaskSlotsUsedByOneCompactionTask(compactionTaskQuery.getTuningConfig());
} else {
throw new ISE("task[%s] is not a compactionTask", status.getId());
}
}
// Skip all the intervals locked by higher priority tasks for each datasource
// This must be done after the invalid compaction tasks are cancelled
// in the loop above so that their intervals are not considered locked
getLockedIntervalsToSkip(compactionConfigList).forEach((dataSource, intervals) -> intervalsToSkipCompaction.computeIfAbsent(dataSource, ds -> new ArrayList<>()).addAll(intervals));
final CompactionSegmentIterator iterator = policy.reset(compactionConfigs, dataSources, intervalsToSkipCompaction);
int totalCapacity;
if (dynamicConfig.isUseAutoScaleSlots()) {
try {
totalCapacity = indexingServiceClient.getTotalWorkerCapacityWithAutoScale();
} catch (Exception e) {
LOG.warn("Failed to get total worker capacity with auto scale slots. Falling back to current capacity count");
totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
}
} else {
totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
}
final int compactionTaskCapacity = (int) Math.min(totalCapacity * dynamicConfig.getCompactionTaskSlotRatio(), dynamicConfig.getMaxCompactionTaskSlots());
final int numAvailableCompactionTaskSlots;
if (numEstimatedNonCompleteCompactionTasks > 0) {
numAvailableCompactionTaskSlots = Math.max(0, compactionTaskCapacity - numEstimatedNonCompleteCompactionTasks);
} else {
// compactionTaskCapacity might be 0 if totalWorkerCapacity is low.
// This guarantees that at least one slot is available if
// compaction is enabled and numEstimatedNonCompleteCompactionTasks is 0.
numAvailableCompactionTaskSlots = Math.max(1, compactionTaskCapacity);
}
LOG.info("Found [%d] available task slots for compaction out of [%d] max compaction task capacity", numAvailableCompactionTaskSlots, compactionTaskCapacity);
stats.addToGlobalStat(AVAILABLE_COMPACTION_TASK_SLOT, numAvailableCompactionTaskSlots);
stats.addToGlobalStat(MAX_COMPACTION_TASK_SLOT, compactionTaskCapacity);
final Map<String, AutoCompactionSnapshot.Builder> currentRunAutoCompactionSnapshotBuilders = new HashMap<>();
if (numAvailableCompactionTaskSlots > 0) {
stats.accumulate(doRun(compactionConfigs, currentRunAutoCompactionSnapshotBuilders, numAvailableCompactionTaskSlots, iterator));
} else {
stats.accumulate(makeStats(currentRunAutoCompactionSnapshotBuilders, 0, iterator));
}
} else {
LOG.info("compactionConfig is empty. Skip.");
autoCompactionSnapshotPerDataSource.set(new HashMap<>());
}
} else {
LOG.info("maxCompactionTaskSlots was set to 0. Skip compaction");
autoCompactionSnapshotPerDataSource.set(new HashMap<>());
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of org.apache.druid.client.indexing.ClientCompactionTaskQuery in project druid by druid-io.
the class ClientCompactionTaskQuerySerdeTest method testCompactionTaskToClientCompactionTaskQuery.
@Test
public void testCompactionTaskToClientCompactionTaskQuery() throws IOException {
final ObjectMapper mapper = setupInjectablesInObjectMapper(new DefaultObjectMapper());
final CompactionTask.Builder builder = new CompactionTask.Builder("datasource", new SegmentCacheManagerFactory(mapper), new RetryPolicyFactory(new RetryPolicyConfig()));
final CompactionTask task = builder.inputSpec(new CompactionIntervalSpec(Intervals.of("2019/2020"), "testSha256OfSortedSegmentIds"), true).tuningConfig(new ParallelIndexTuningConfig(null, null, null, 40000, 2000L, null, null, null, new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10), new DynamicPartitionsSpec(100, 30000L), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.UNCOMPRESSED, LongEncodingStrategy.AUTO), 2, null, null, 1000L, TmpFileSegmentWriteOutMediumFactory.instance(), null, 100, 5, 1000L, new Duration(3000L), 7, 1000, 100, null, null, null, null, null, null)).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, Granularities.HOUR, true)).dimensionsSpec(DimensionsSpec.builder().setDimensions(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))).setDimensionExclusions(ImmutableList.of("__time", "val")).build()).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt") }).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim1", "foo", null))).build();
final ClientCompactionTaskQuery expected = new ClientCompactionTaskQuery(task.getId(), "datasource", new ClientCompactionIOConfig(new ClientCompactionIntervalSpec(Intervals.of("2019/2020"), "testSha256OfSortedSegmentIds"), true), new ClientCompactionTaskQueryTuningConfig(100, 40000, 2000L, 30000L, new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10), new DynamicPartitionsSpec(100, 30000L), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.UNCOMPRESSED, LongEncodingStrategy.AUTO), 2, 1000L, TmpFileSegmentWriteOutMediumFactory.instance(), 100, 5, 1000L, new Duration(3000L), 7, 1000, 100), new ClientCompactionTaskGranularitySpec(Granularities.DAY, Granularities.HOUR, true), new ClientCompactionTaskDimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim1", "foo", null)), new HashMap<>());
final byte[] json = mapper.writeValueAsBytes(task);
final ClientCompactionTaskQuery actual = (ClientCompactionTaskQuery) mapper.readValue(json, ClientTaskQuery.class);
Assert.assertEquals(expected, actual);
}
use of org.apache.druid.client.indexing.ClientCompactionTaskQuery in project druid by druid-io.
the class ClientCompactionTaskQuerySerdeTest method testClientCompactionTaskQueryToCompactionTask.
@Test
public void testClientCompactionTaskQueryToCompactionTask() throws IOException {
final ObjectMapper mapper = setupInjectablesInObjectMapper(new DefaultObjectMapper());
final ClientCompactionTaskQuery query = new ClientCompactionTaskQuery("id", "datasource", new ClientCompactionIOConfig(new ClientCompactionIntervalSpec(Intervals.of("2019/2020"), "testSha256OfSortedSegmentIds"), true), new ClientCompactionTaskQueryTuningConfig(null, 40000, 2000L, null, new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10), new DynamicPartitionsSpec(100, 30000L), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.UNCOMPRESSED, LongEncodingStrategy.AUTO), 2, 1000L, TmpFileSegmentWriteOutMediumFactory.instance(), 100, 5, 1000L, new Duration(3000L), 7, 1000, 100), new ClientCompactionTaskGranularitySpec(Granularities.DAY, Granularities.HOUR, true), new ClientCompactionTaskDimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim1", "foo", null)), ImmutableMap.of("key", "value"));
final byte[] json = mapper.writeValueAsBytes(query);
final CompactionTask task = (CompactionTask) mapper.readValue(json, Task.class);
Assert.assertEquals(query.getId(), task.getId());
Assert.assertEquals(query.getDataSource(), task.getDataSource());
Assert.assertTrue(task.getIoConfig().getInputSpec() instanceof CompactionIntervalSpec);
Assert.assertEquals(query.getIoConfig().getInputSpec().getInterval(), ((CompactionIntervalSpec) task.getIoConfig().getInputSpec()).getInterval());
Assert.assertEquals(query.getIoConfig().getInputSpec().getSha256OfSortedSegmentIds(), ((CompactionIntervalSpec) task.getIoConfig().getInputSpec()).getSha256OfSortedSegmentIds());
Assert.assertEquals(query.getTuningConfig().getMaxRowsInMemory().intValue(), task.getTuningConfig().getMaxRowsInMemory());
Assert.assertEquals(query.getTuningConfig().getMaxBytesInMemory().longValue(), task.getTuningConfig().getMaxBytesInMemory());
Assert.assertEquals(query.getTuningConfig().getSplitHintSpec(), task.getTuningConfig().getSplitHintSpec());
Assert.assertEquals(query.getTuningConfig().getPartitionsSpec(), task.getTuningConfig().getPartitionsSpec());
Assert.assertEquals(query.getTuningConfig().getIndexSpec(), task.getTuningConfig().getIndexSpec());
Assert.assertEquals(query.getTuningConfig().getIndexSpecForIntermediatePersists(), task.getTuningConfig().getIndexSpecForIntermediatePersists());
Assert.assertEquals(query.getTuningConfig().getPushTimeout().longValue(), task.getTuningConfig().getPushTimeout());
Assert.assertEquals(query.getTuningConfig().getSegmentWriteOutMediumFactory(), task.getTuningConfig().getSegmentWriteOutMediumFactory());
Assert.assertEquals(query.getTuningConfig().getMaxNumConcurrentSubTasks().intValue(), task.getTuningConfig().getMaxNumConcurrentSubTasks());
Assert.assertEquals(query.getTuningConfig().getMaxRetry().intValue(), task.getTuningConfig().getMaxRetry());
Assert.assertEquals(query.getTuningConfig().getTaskStatusCheckPeriodMs().longValue(), task.getTuningConfig().getTaskStatusCheckPeriodMs());
Assert.assertEquals(query.getTuningConfig().getChatHandlerTimeout(), task.getTuningConfig().getChatHandlerTimeout());
Assert.assertEquals(query.getTuningConfig().getMaxNumSegmentsToMerge().intValue(), task.getTuningConfig().getMaxNumSegmentsToMerge());
Assert.assertEquals(query.getTuningConfig().getTotalNumMergeTasks().intValue(), task.getTuningConfig().getTotalNumMergeTasks());
Assert.assertEquals(query.getGranularitySpec(), task.getGranularitySpec());
Assert.assertEquals(query.getGranularitySpec().getQueryGranularity(), task.getGranularitySpec().getQueryGranularity());
Assert.assertEquals(query.getGranularitySpec().getSegmentGranularity(), task.getGranularitySpec().getSegmentGranularity());
Assert.assertEquals(query.getGranularitySpec().isRollup(), task.getGranularitySpec().isRollup());
Assert.assertEquals(query.getIoConfig().isDropExisting(), task.getIoConfig().isDropExisting());
Assert.assertEquals(query.getContext(), task.getContext());
Assert.assertEquals(query.getDimensionsSpec().getDimensions(), task.getDimensionsSpec().getDimensions());
Assert.assertEquals(query.getTransformSpec().getFilter(), task.getTransformSpec().getFilter());
Assert.assertArrayEquals(query.getMetricsSpec(), task.getMetricsSpec());
}
use of org.apache.druid.client.indexing.ClientCompactionTaskQuery in project druid by druid-io.
the class CompactSegmentsTest method testRunWithLockedIntervals.
@Test
public void testRunWithLockedIntervals() {
final TestDruidLeaderClient leaderClient = new TestDruidLeaderClient(JSON_MAPPER);
leaderClient.start();
HttpIndexingServiceClient indexingServiceClient = new HttpIndexingServiceClient(JSON_MAPPER, leaderClient);
// Lock all intervals for dataSource_1 and dataSource_2
final String datasource1 = DATA_SOURCE_PREFIX + 1;
leaderClient.lockedIntervals.computeIfAbsent(datasource1, k -> new ArrayList<>()).add(Intervals.of("2017/2018"));
final String datasource2 = DATA_SOURCE_PREFIX + 2;
leaderClient.lockedIntervals.computeIfAbsent(datasource2, k -> new ArrayList<>()).add(Intervals.of("2017/2018"));
// Lock all intervals but one for dataSource_0
final String datasource0 = DATA_SOURCE_PREFIX + 0;
leaderClient.lockedIntervals.computeIfAbsent(datasource0, k -> new ArrayList<>()).add(Intervals.of("2017-01-01T13:00:00Z/2017-02-01"));
// Verify that locked intervals are skipped and only one compaction task
// is submitted for dataSource_0
CompactSegments compactSegments = new CompactSegments(COORDINATOR_CONFIG, JSON_MAPPER, indexingServiceClient);
final CoordinatorStats stats = doCompactSegments(compactSegments, createCompactionConfigs(2), 4);
Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
Assert.assertEquals(1, leaderClient.submittedCompactionTasks.size());
final ClientCompactionTaskQuery compactionTask = leaderClient.submittedCompactionTasks.get(0);
Assert.assertEquals(datasource0, compactionTask.getDataSource());
Assert.assertEquals(Intervals.of("2017-01-01T00:00:00/2017-01-01T12:00:00"), compactionTask.getIoConfig().getInputSpec().getInterval());
}
use of org.apache.druid.client.indexing.ClientCompactionTaskQuery in project druid by druid-io.
the class CompactSegmentsTest method testCompactWithGranularitySpecConflictWithActiveCompactionTask.
@Test
public void testCompactWithGranularitySpecConflictWithActiveCompactionTask() {
final String dataSource = DATA_SOURCE_PREFIX + 0;
final String conflictTaskId = "taskIdDummy";
final HttpIndexingServiceClient mockIndexingServiceClient = Mockito.mock(HttpIndexingServiceClient.class);
TaskStatusPlus runningConflictCompactionTask = new TaskStatusPlus(conflictTaskId, "groupId", "compact", DateTimes.EPOCH, DateTimes.EPOCH, TaskState.RUNNING, RunnerTaskState.RUNNING, -1L, TaskLocation.unknown(), dataSource, null);
TaskPayloadResponse runningConflictCompactionTaskPayload = new TaskPayloadResponse(conflictTaskId, new ClientCompactionTaskQuery(conflictTaskId, dataSource, new ClientCompactionIOConfig(new ClientCompactionIntervalSpec(Intervals.of("2000/2099"), "testSha256OfSortedSegmentIds"), null), null, new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null), null, null, null, null));
Mockito.when(mockIndexingServiceClient.getActiveTasks()).thenReturn(ImmutableList.of(runningConflictCompactionTask));
Mockito.when(mockIndexingServiceClient.getTaskPayload(ArgumentMatchers.eq(conflictTaskId))).thenReturn(runningConflictCompactionTaskPayload);
final CompactSegments compactSegments = new CompactSegments(COORDINATOR_CONFIG, JSON_MAPPER, mockIndexingServiceClient);
final List<DataSourceCompactionConfig> compactionConfigs = new ArrayList<>();
compactionConfigs.add(new DataSourceCompactionConfig(dataSource, 0, 500L, null, // smaller than segment interval
new Period("PT0H"), new UserCompactionTaskQueryTuningConfig(null, null, null, null, partitionsSpec, null, null, null, null, null, 3, null, null, null, null, null, null), new UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null), null, null, null, null, null));
doCompactSegments(compactSegments, compactionConfigs);
// Verify that conflict task was canceled
Mockito.verify(mockIndexingServiceClient).cancelTask(conflictTaskId);
// The active conflict task has interval of 2000/2099
// Make sure that we do not skip interval of conflict task.
// Since we cancel the task and will have to compact those intervals with the new segmentGranulartity
ArgumentCaptor<List<DataSegment>> segmentsCaptor = ArgumentCaptor.forClass(List.class);
ArgumentCaptor<ClientCompactionTaskGranularitySpec> granularitySpecArgumentCaptor = ArgumentCaptor.forClass(ClientCompactionTaskGranularitySpec.class);
Mockito.verify(mockIndexingServiceClient).compactSegments(ArgumentMatchers.anyString(), segmentsCaptor.capture(), ArgumentMatchers.anyInt(), ArgumentMatchers.any(), granularitySpecArgumentCaptor.capture(), ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any());
// All segments is compact at the same time since we changed the segment granularity to YEAR and all segment
// are within the same year
Assert.assertEquals(datasourceToSegments.get(dataSource).size(), segmentsCaptor.getValue().size());
ClientCompactionTaskGranularitySpec actual = granularitySpecArgumentCaptor.getValue();
Assert.assertNotNull(actual);
ClientCompactionTaskGranularitySpec expected = new ClientCompactionTaskGranularitySpec(Granularities.YEAR, null, null);
Assert.assertEquals(expected, actual);
}
Aggregations