use of org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams in project druid by druid-io.
the class CompactSegments method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
LOG.info("Compact segments");
final CoordinatorCompactionConfig dynamicConfig = params.getCoordinatorCompactionConfig();
final CoordinatorStats stats = new CoordinatorStats();
List<DataSourceCompactionConfig> compactionConfigList = dynamicConfig.getCompactionConfigs();
if (dynamicConfig.getMaxCompactionTaskSlots() > 0) {
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = params.getUsedSegmentsTimelinesPerDataSource();
if (compactionConfigList != null && !compactionConfigList.isEmpty()) {
Map<String, DataSourceCompactionConfig> compactionConfigs = compactionConfigList.stream().collect(Collectors.toMap(DataSourceCompactionConfig::getDataSource, Function.identity()));
final List<TaskStatusPlus> compactionTasks = filterNonCompactionTasks(indexingServiceClient.getActiveTasks());
// dataSource -> list of intervals for which compaction will be skipped in this run
final Map<String, List<Interval>> intervalsToSkipCompaction = new HashMap<>();
int numEstimatedNonCompleteCompactionTasks = 0;
for (TaskStatusPlus status : compactionTasks) {
final TaskPayloadResponse response = indexingServiceClient.getTaskPayload(status.getId());
if (response == null) {
throw new ISE("Got a null paylord from overlord for task[%s]", status.getId());
}
if (COMPACTION_TASK_TYPE.equals(response.getPayload().getType())) {
final ClientCompactionTaskQuery compactionTaskQuery = (ClientCompactionTaskQuery) response.getPayload();
DataSourceCompactionConfig dataSourceCompactionConfig = compactionConfigs.get(status.getDataSource());
if (dataSourceCompactionConfig != null && dataSourceCompactionConfig.getGranularitySpec() != null) {
Granularity configuredSegmentGranularity = dataSourceCompactionConfig.getGranularitySpec().getSegmentGranularity();
if (configuredSegmentGranularity != null && compactionTaskQuery.getGranularitySpec() != null && !configuredSegmentGranularity.equals(compactionTaskQuery.getGranularitySpec().getSegmentGranularity())) {
// We will cancel active compaction task if segmentGranularity changes and we will need to
// re-compact the interval
LOG.info("Canceled task[%s] as task segmentGranularity is [%s] but compaction config " + "segmentGranularity is [%s]", status.getId(), compactionTaskQuery.getGranularitySpec().getSegmentGranularity(), configuredSegmentGranularity);
indexingServiceClient.cancelTask(status.getId());
continue;
}
}
// Skip interval as the current active compaction task is good
final Interval interval = compactionTaskQuery.getIoConfig().getInputSpec().getInterval();
intervalsToSkipCompaction.computeIfAbsent(status.getDataSource(), k -> new ArrayList<>()).add(interval);
// Since we keep the current active compaction task running, we count the active task slots
numEstimatedNonCompleteCompactionTasks += findMaxNumTaskSlotsUsedByOneCompactionTask(compactionTaskQuery.getTuningConfig());
} else {
throw new ISE("task[%s] is not a compactionTask", status.getId());
}
}
// Skip all the intervals locked by higher priority tasks for each datasource
// This must be done after the invalid compaction tasks are cancelled
// in the loop above so that their intervals are not considered locked
getLockedIntervalsToSkip(compactionConfigList).forEach((dataSource, intervals) -> intervalsToSkipCompaction.computeIfAbsent(dataSource, ds -> new ArrayList<>()).addAll(intervals));
final CompactionSegmentIterator iterator = policy.reset(compactionConfigs, dataSources, intervalsToSkipCompaction);
int totalCapacity;
if (dynamicConfig.isUseAutoScaleSlots()) {
try {
totalCapacity = indexingServiceClient.getTotalWorkerCapacityWithAutoScale();
} catch (Exception e) {
LOG.warn("Failed to get total worker capacity with auto scale slots. Falling back to current capacity count");
totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
}
} else {
totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
}
final int compactionTaskCapacity = (int) Math.min(totalCapacity * dynamicConfig.getCompactionTaskSlotRatio(), dynamicConfig.getMaxCompactionTaskSlots());
final int numAvailableCompactionTaskSlots;
if (numEstimatedNonCompleteCompactionTasks > 0) {
numAvailableCompactionTaskSlots = Math.max(0, compactionTaskCapacity - numEstimatedNonCompleteCompactionTasks);
} else {
// compactionTaskCapacity might be 0 if totalWorkerCapacity is low.
// This guarantees that at least one slot is available if
// compaction is enabled and numEstimatedNonCompleteCompactionTasks is 0.
numAvailableCompactionTaskSlots = Math.max(1, compactionTaskCapacity);
}
LOG.info("Found [%d] available task slots for compaction out of [%d] max compaction task capacity", numAvailableCompactionTaskSlots, compactionTaskCapacity);
stats.addToGlobalStat(AVAILABLE_COMPACTION_TASK_SLOT, numAvailableCompactionTaskSlots);
stats.addToGlobalStat(MAX_COMPACTION_TASK_SLOT, compactionTaskCapacity);
final Map<String, AutoCompactionSnapshot.Builder> currentRunAutoCompactionSnapshotBuilders = new HashMap<>();
if (numAvailableCompactionTaskSlots > 0) {
stats.accumulate(doRun(compactionConfigs, currentRunAutoCompactionSnapshotBuilders, numAvailableCompactionTaskSlots, iterator));
} else {
stats.accumulate(makeStats(currentRunAutoCompactionSnapshotBuilders, 0, iterator));
}
} else {
LOG.info("compactionConfig is empty. Skip.");
autoCompactionSnapshotPerDataSource.set(new HashMap<>());
}
} else {
LOG.info("maxCompactionTaskSlots was set to 0. Skip compaction");
autoCompactionSnapshotPerDataSource.set(new HashMap<>());
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams in project druid by druid-io.
the class BroadcastDistributionRule method run.
@Override
public CoordinatorStats run(DruidCoordinator coordinator, DruidCoordinatorRuntimeParams params, DataSegment segment) {
final Set<ServerHolder> dropServerHolders = new HashSet<>();
// Find servers where we need to load the broadcast segments
final Set<ServerHolder> loadServerHolders = params.getDruidCluster().getAllServers().stream().filter((serverHolder) -> {
ServerType serverType = serverHolder.getServer().getType();
if (!serverType.isSegmentBroadcastTarget()) {
return false;
}
final boolean isServingSegment = serverHolder.isServingSegment(segment);
if (serverHolder.isDecommissioning()) {
if (isServingSegment && !serverHolder.isDroppingSegment(segment)) {
dropServerHolders.add(serverHolder);
}
return false;
}
return !isServingSegment && !serverHolder.isLoadingSegment(segment);
}).collect(Collectors.toSet());
final CoordinatorStats stats = new CoordinatorStats();
return stats.accumulate(assign(loadServerHolders, segment)).accumulate(drop(dropServerHolders, segment));
}
use of org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams in project druid by druid-io.
the class MarkAsUnusedOvershadowedSegmentsTest method testRun.
@Test
@Parameters({ "historical", "broker" })
public void testRun(String serverTypeString) {
ServerType serverType = ServerType.fromString(serverTypeString);
markAsUnusedOvershadowedSegments = new MarkAsUnusedOvershadowedSegments(coordinator);
usedSegments = ImmutableList.of(segmentV1, segmentV0, segmentV2);
// Dummy values for comparisons in TreeSet
EasyMock.expect(mockPeon.getLoadQueueSize()).andReturn(0L).anyTimes();
EasyMock.expect(druidServer.getMaxSize()).andReturn(0L).anyTimes();
EasyMock.expect(druidServer.getCurrSize()).andReturn(0L).anyTimes();
EasyMock.expect(druidServer.getName()).andReturn("").anyTimes();
EasyMock.expect(druidServer.getHost()).andReturn("").anyTimes();
EasyMock.expect(druidServer.getTier()).andReturn("").anyTimes();
EasyMock.expect(druidServer.getType()).andReturn(serverType).anyTimes();
EasyMock.expect(druidServer.getDataSources()).andReturn(ImmutableList.of(druidDataSource)).anyTimes();
EasyMock.expect(druidDataSource.getSegments()).andReturn(ImmutableSet.of(segmentV1, segmentV2)).anyTimes();
EasyMock.expect(druidDataSource.getName()).andReturn("test").anyTimes();
coordinator.markSegmentAsUnused(segmentV1);
coordinator.markSegmentAsUnused(segmentV0);
EasyMock.expectLastCall();
EasyMock.replay(mockPeon, coordinator, druidServer, druidDataSource);
druidCluster = DruidClusterBuilder.newBuilder().addTier("normal", new ServerHolder(druidServer, mockPeon)).build();
DruidCoordinatorRuntimeParams params = CoordinatorRuntimeParamsTestHelpers.newBuilder().withUsedSegmentsInTest(usedSegments).withCoordinatorStats(new CoordinatorStats()).withDruidCluster(druidCluster).withDynamicConfigs(RunRulesTest.COORDINATOR_CONFIG_WITH_ZERO_LEADING_TIME_BEFORE_CAN_MARK_AS_UNUSED_OVERSHADOWED_SEGMENTS).build();
markAsUnusedOvershadowedSegments.run(params);
EasyMock.verify(coordinator, druidDataSource, druidServer);
}
use of org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams in project druid by druid-io.
the class BalanceSegments method balanceServers.
private Pair<Integer, Integer> balanceServers(DruidCoordinatorRuntimeParams params, List<ServerHolder> toMoveFrom, List<ServerHolder> toMoveTo, int maxSegmentsToMove) {
if (maxSegmentsToMove <= 0) {
log.debug("maxSegmentsToMove is 0; no balancing work can be performed.");
return new Pair<>(0, 0);
} else if (toMoveFrom.isEmpty()) {
log.debug("toMoveFrom is empty; no balancing work can be performed.");
return new Pair<>(0, 0);
} else if (toMoveTo.isEmpty()) {
log.debug("toMoveTo is empty; no balancing work can be peformed.");
return new Pair<>(0, 0);
}
final BalancerStrategy strategy = params.getBalancerStrategy();
final int maxIterations = 2 * maxSegmentsToMove;
final int maxToLoad = params.getCoordinatorDynamicConfig().getMaxSegmentsInNodeLoadingQueue();
int moved = 0, unmoved = 0;
Iterator<BalancerSegmentHolder> segmentsToMove;
// The pick method depends on if the operator has enabled batched segment sampling in the Coorinator dynamic config.
if (params.getCoordinatorDynamicConfig().useBatchedSegmentSampler()) {
segmentsToMove = strategy.pickSegmentsToMove(toMoveFrom, params.getBroadcastDatasources(), maxSegmentsToMove);
} else {
segmentsToMove = strategy.pickSegmentsToMove(toMoveFrom, params.getBroadcastDatasources(), params.getCoordinatorDynamicConfig().getPercentOfSegmentsToConsiderPerMove());
}
// noinspection ForLoopThatDoesntUseLoopVariable
for (int iter = 0; (moved + unmoved) < maxSegmentsToMove; ++iter) {
if (!segmentsToMove.hasNext()) {
log.info("All servers to move segments from are empty, ending run.");
break;
}
final BalancerSegmentHolder segmentToMoveHolder = segmentsToMove.next();
// DruidCoordinatorRuntimeParams.getUsedSegments originate from SegmentsMetadataManager, i. e. that's a set of segments
// that *should* be loaded. segmentToMoveHolder.getSegment originates from ServerInventoryView, i. e. that may be
// any segment that happens to be loaded on some server, even if it is not used. (Coordinator closes such
// discrepancies eventually via UnloadUnusedSegments). Therefore the picked segmentToMoveHolder's segment may not
// need to be balanced.
boolean needToBalancePickedSegment = params.getUsedSegments().contains(segmentToMoveHolder.getSegment());
if (needToBalancePickedSegment) {
final DataSegment segmentToMove = segmentToMoveHolder.getSegment();
final ImmutableDruidServer fromServer = segmentToMoveHolder.getFromServer();
// we want to leave the server the segment is currently on in the list...
// but filter out replicas that are already serving the segment, and servers with a full load queue
final List<ServerHolder> toMoveToWithLoadQueueCapacityAndNotServingSegment = toMoveTo.stream().filter(s -> s.getServer().equals(fromServer) || (!s.isServingSegment(segmentToMove) && (maxToLoad <= 0 || s.getNumberOfSegmentsInQueue() < maxToLoad))).collect(Collectors.toList());
if (toMoveToWithLoadQueueCapacityAndNotServingSegment.size() > 0) {
final ServerHolder destinationHolder = strategy.findNewSegmentHomeBalancer(segmentToMove, toMoveToWithLoadQueueCapacityAndNotServingSegment);
if (destinationHolder != null && !destinationHolder.getServer().equals(fromServer)) {
if (moveSegment(segmentToMoveHolder, destinationHolder.getServer(), params)) {
moved++;
} else {
unmoved++;
}
} else {
log.debug("Segment [%s] is 'optimally' placed.", segmentToMove.getId());
unmoved++;
}
} else {
log.debug("No valid movement destinations for segment [%s].", segmentToMove.getId());
unmoved++;
}
}
if (iter >= maxIterations) {
log.info("Unable to select %d remaining candidate segments out of %d total to balance " + "after %d iterations, ending run.", (maxSegmentsToMove - moved - unmoved), maxSegmentsToMove, iter);
break;
}
}
return new Pair<>(moved, unmoved);
}
use of org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams in project druid by druid-io.
the class EmitClusterStatsAndMetrics method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
DruidCluster cluster = params.getDruidCluster();
CoordinatorStats stats = params.getCoordinatorStats();
ServiceEmitter emitter = params.getEmitter();
stats.forEachTieredStat("assignedCount", (final String tier, final long count) -> {
log.info("[%s] : Assigned %s segments among %,d servers", tier, count, cluster.getHistoricalsByTier(tier).size());
emitTieredStat(emitter, "segment/assigned/count", tier, count);
});
stats.forEachTieredStat("droppedCount", (final String tier, final long count) -> {
log.info("[%s] : Dropped %s segments among %,d servers", tier, count, cluster.getHistoricalsByTier(tier).size());
emitTieredStat(emitter, "segment/dropped/count", tier, count);
});
emitTieredStats(emitter, "segment/cost/raw", stats, "initialCost");
emitTieredStats(emitter, "segment/cost/normalization", stats, "normalization");
emitTieredStats(emitter, "segment/moved/count", stats, "movedCount");
emitTieredStats(emitter, "segment/deleted/count", stats, "deletedCount");
stats.forEachTieredStat("normalizedInitialCostTimesOneThousand", (final String tier, final long count) -> {
emitTieredStat(emitter, "segment/cost/normalized", tier, count / 1000d);
});
stats.forEachTieredStat("unneededCount", (final String tier, final long count) -> {
log.info("[%s] : Removed %s unneeded segments among %,d servers", tier, count, cluster.getHistoricalsByTier(tier).size());
emitTieredStat(emitter, "segment/unneeded/count", tier, count);
});
emitter.emit(new ServiceMetricEvent.Builder().build("segment/overShadowed/count", stats.getGlobalStat("overShadowedCount")));
stats.forEachTieredStat("movedCount", (final String tier, final long count) -> {
log.info("[%s] : Moved %,d segment(s)", tier, count);
});
stats.forEachTieredStat("unmovedCount", (final String tier, final long count) -> {
log.info("[%s] : Let alone %,d segment(s)", tier, count);
});
log.info("Load Queues:");
for (Iterable<ServerHolder> serverHolders : cluster.getSortedHistoricalsByTier()) {
for (ServerHolder serverHolder : serverHolders) {
ImmutableDruidServer server = serverHolder.getServer();
LoadQueuePeon queuePeon = serverHolder.getPeon();
log.info("Server[%s, %s, %s] has %,d left to load, %,d left to drop, %,d bytes queued, %,d bytes served.", server.getName(), server.getType().toString(), server.getTier(), queuePeon.getSegmentsToLoad().size(), queuePeon.getSegmentsToDrop().size(), queuePeon.getLoadQueueSize(), server.getCurrSize());
if (log.isDebugEnabled()) {
for (DataSegment segment : queuePeon.getSegmentsToLoad()) {
log.debug("Segment to load[%s]", segment);
}
for (DataSegment segment : queuePeon.getSegmentsToDrop()) {
log.debug("Segment to drop[%s]", segment);
}
}
stats.addToTieredStat(TOTAL_CAPACITY, server.getTier(), server.getMaxSize());
stats.addToTieredStat(TOTAL_HISTORICAL_COUNT, server.getTier(), 1);
}
}
params.getDatabaseRuleManager().getAllRules().values().forEach(rules -> rules.forEach(rule -> {
if (rule instanceof LoadRule) {
((LoadRule) rule).getTieredReplicants().forEach((tier, replica) -> stats.accumulateMaxTieredStat(MAX_REPLICATION_FACTOR, tier, replica));
}
}));
emitTieredStats(emitter, "tier/required/capacity", stats, LoadRule.REQUIRED_CAPACITY);
emitTieredStats(emitter, "tier/total/capacity", stats, TOTAL_CAPACITY);
emitTieredStats(emitter, "tier/replication/factor", stats, MAX_REPLICATION_FACTOR);
emitTieredStats(emitter, "tier/historical/count", stats, TOTAL_HISTORICAL_COUNT);
// Emit coordinator metrics
params.getLoadManagementPeons().forEach((final String serverName, final LoadQueuePeon queuePeon) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/size", queuePeon.getLoadQueueSize()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/failed", queuePeon.getAndResetFailedAssignCount()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/count", queuePeon.getSegmentsToLoad().size()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/dropQueue/count", queuePeon.getSegmentsToDrop().size()));
});
coordinator.computeNumsUnavailableUsedSegmentsPerDataSource().object2IntEntrySet().forEach((final Object2IntMap.Entry<String> entry) -> {
final String dataSource = entry.getKey();
final int numUnavailableUsedSegmentsInDataSource = entry.getIntValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/unavailable/count", numUnavailableUsedSegmentsInDataSource));
});
coordinator.computeUnderReplicationCountsPerDataSourcePerTier().forEach((final String tier, final Object2LongMap<String> underReplicationCountsPerDataSource) -> {
for (final Object2LongMap.Entry<String> entry : underReplicationCountsPerDataSource.object2LongEntrySet()) {
final String dataSource = entry.getKey();
final long underReplicationCount = entry.getLongValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.TIER, tier).setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/underReplicated/count", underReplicationCount));
}
});
emitter.emit(new ServiceMetricEvent.Builder().build("compact/task/count", stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT)));
emitter.emit(new ServiceMetricEvent.Builder().build("compactTask/maxSlot/count", stats.getGlobalStat(CompactSegments.MAX_COMPACTION_TASK_SLOT)));
emitter.emit(new ServiceMetricEvent.Builder().build("compactTask/availableSlot/count", stats.getGlobalStat(CompactSegments.AVAILABLE_COMPACTION_TASK_SLOT)));
stats.forEachDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_AWAITING, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/waitCompact/bytes", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_COUNT_OF_SEGMENTS_AWAITING, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/waitCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_INTERVAL_OF_SEGMENTS_AWAITING, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("interval/waitCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_SKIPPED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/skipCompact/bytes", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_COUNT_OF_SEGMENTS_SKIPPED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/skipCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_INTERVAL_OF_SEGMENTS_SKIPPED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("interval/skipCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_COMPACTED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/compacted/bytes", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_COUNT_OF_SEGMENTS_COMPACTED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/compacted/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_INTERVAL_OF_SEGMENTS_COMPACTED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("interval/compacted/count", count));
});
// Emit segment metrics
params.getUsedSegmentsTimelinesPerDataSource().forEach((String dataSource, VersionedIntervalTimeline<String, DataSegment> dataSourceWithUsedSegments) -> {
long totalSizeOfUsedSegments = dataSourceWithUsedSegments.iterateAllObjects().stream().mapToLong(DataSegment::getSize).sum();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/size", totalSizeOfUsedSegments));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/count", dataSourceWithUsedSegments.getNumObjects()));
});
// Emit coordinator runtime stats
emitDutyStats(emitter, "coordinator/time", stats, "runtime");
return params;
}
Aggregations