use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.
the class DruidCoordinatorBalancer method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
final CoordinatorStats stats = new CoordinatorStats();
final BalancerStrategy strategy = params.getBalancerStrategy();
final int maxSegmentsToMove = params.getCoordinatorDynamicConfig().getMaxSegmentsToMove();
for (Map.Entry<String, MinMaxPriorityQueue<ServerHolder>> entry : params.getDruidCluster().getCluster().entrySet()) {
String tier = entry.getKey();
if (currentlyMovingSegments.get(tier) == null) {
currentlyMovingSegments.put(tier, new ConcurrentHashMap<String, BalancerSegmentHolder>());
}
if (!currentlyMovingSegments.get(tier).isEmpty()) {
reduceLifetimes(tier);
log.info("[%s]: Still waiting on %,d segments to be moved", tier, currentlyMovingSegments.size());
continue;
}
final List<ServerHolder> serverHolderList = Lists.newArrayList(entry.getValue());
if (serverHolderList.size() <= 1) {
log.info("[%s]: One or fewer servers found. Cannot balance.", tier);
continue;
}
int numSegments = 0;
for (ServerHolder server : serverHolderList) {
numSegments += server.getServer().getSegments().size();
}
if (numSegments == 0) {
log.info("No segments found. Cannot balance.");
continue;
}
long unmoved = 0L;
for (int iter = 0; iter < maxSegmentsToMove; iter++) {
final BalancerSegmentHolder segmentToMove = strategy.pickSegmentToMove(serverHolderList);
if (segmentToMove != null && params.getAvailableSegments().contains(segmentToMove.getSegment())) {
final ServerHolder holder = strategy.findNewSegmentHomeBalancer(segmentToMove.getSegment(), serverHolderList);
if (holder != null) {
moveSegment(segmentToMove, holder.getServer(), params);
} else {
++unmoved;
}
}
}
if (unmoved == maxSegmentsToMove) {
// Cluster should be alive and constantly adjusting
log.info("No good moves found in tier [%s]", tier);
}
stats.addToTieredStat("unmovedCount", tier, unmoved);
stats.addToTieredStat("movedCount", tier, currentlyMovingSegments.get(tier).size());
if (params.getCoordinatorDynamicConfig().emitBalancingStats()) {
strategy.emitStats(tier, stats, serverHolderList);
}
log.info("[%s]: Segments Moved: [%d] Segments Let Alone: [%d]", tier, currentlyMovingSegments.get(tier).size(), unmoved);
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.
the class DruidCoordinatorCleanupOvershadowed method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
CoordinatorStats stats = new CoordinatorStats();
// Unservice old partitions if we've had enough time to make sure we aren't flapping with old data
if (params.hasDeletionWaitTimeElapsed()) {
DruidCluster cluster = params.getDruidCluster();
Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = Maps.newHashMap();
for (MinMaxPriorityQueue<ServerHolder> serverHolders : cluster.getSortedServersByTier()) {
for (ServerHolder serverHolder : serverHolders) {
ImmutableDruidServer server = serverHolder.getServer();
for (ImmutableDruidDataSource dataSource : server.getDataSources()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSource.getName());
if (timeline == null) {
timeline = new VersionedIntervalTimeline<>(Comparators.comparable());
timelines.put(dataSource.getName(), timeline);
}
for (DataSegment segment : dataSource.getSegments()) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
}
}
}
//Remove all segments in db that are overshadowed by served segments
for (DataSegment dataSegment : params.getAvailableSegments()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSegment.getDataSource());
if (timeline != null && timeline.isOvershadowed(dataSegment.getInterval(), dataSegment.getVersion())) {
coordinator.removeSegment(dataSegment);
stats.addToGlobalStat("overShadowedCount", 1);
}
}
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.
the class DruidCoordinatorLogger method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
DruidCluster cluster = params.getDruidCluster();
CoordinatorStats stats = params.getCoordinatorStats();
ServiceEmitter emitter = params.getEmitter();
Map<String, AtomicLong> assigned = stats.getPerTierStats().get("assignedCount");
if (assigned != null) {
for (Map.Entry<String, AtomicLong> entry : assigned.entrySet()) {
log.info("[%s] : Assigned %s segments among %,d servers", entry.getKey(), entry.getValue().get(), cluster.get(entry.getKey()).size());
}
}
emitTieredStats(emitter, "segment/assigned/count", assigned);
Map<String, AtomicLong> dropped = stats.getPerTierStats().get("droppedCount");
if (dropped != null) {
for (Map.Entry<String, AtomicLong> entry : dropped.entrySet()) {
log.info("[%s] : Dropped %s segments among %,d servers", entry.getKey(), entry.getValue().get(), cluster.get(entry.getKey()).size());
}
}
emitTieredStats(emitter, "segment/dropped/count", dropped);
emitTieredStats(emitter, "segment/cost/raw", stats.getPerTierStats().get("initialCost"));
emitTieredStats(emitter, "segment/cost/normalization", stats.getPerTierStats().get("normalization"));
emitTieredStats(emitter, "segment/moved/count", stats.getPerTierStats().get("movedCount"));
emitTieredStats(emitter, "segment/deleted/count", stats.getPerTierStats().get("deletedCount"));
Map<String, AtomicLong> normalized = stats.getPerTierStats().get("normalizedInitialCostTimesOneThousand");
if (normalized != null) {
emitTieredStats(emitter, "segment/cost/normalized", Maps.transformEntries(normalized, new Maps.EntryTransformer<String, AtomicLong, Number>() {
@Override
public Number transformEntry(String key, AtomicLong value) {
return value.doubleValue() / 1000d;
}
}));
}
Map<String, AtomicLong> unneeded = stats.getPerTierStats().get("unneededCount");
if (unneeded != null) {
for (Map.Entry<String, AtomicLong> entry : unneeded.entrySet()) {
log.info("[%s] : Removed %s unneeded segments among %,d servers", entry.getKey(), entry.getValue().get(), cluster.get(entry.getKey()).size());
}
}
emitTieredStats(emitter, "segment/unneeded/count", stats.getPerTierStats().get("unneededCount"));
emitter.emit(new ServiceMetricEvent.Builder().build("segment/overShadowed/count", stats.getGlobalStats().get("overShadowedCount")));
Map<String, AtomicLong> moved = stats.getPerTierStats().get("movedCount");
if (moved != null) {
for (Map.Entry<String, AtomicLong> entry : moved.entrySet()) {
log.info("[%s] : Moved %,d segment(s)", entry.getKey(), entry.getValue().get());
}
}
final Map<String, AtomicLong> unmoved = stats.getPerTierStats().get("unmovedCount");
if (unmoved != null) {
for (Map.Entry<String, AtomicLong> entry : unmoved.entrySet()) {
log.info("[%s] : Let alone %,d segment(s)", entry.getKey(), entry.getValue().get());
}
}
log.info("Load Queues:");
for (MinMaxPriorityQueue<ServerHolder> serverHolders : cluster.getSortedServersByTier()) {
for (ServerHolder serverHolder : serverHolders) {
ImmutableDruidServer server = serverHolder.getServer();
LoadQueuePeon queuePeon = serverHolder.getPeon();
log.info("Server[%s, %s, %s] has %,d left to load, %,d left to drop, %,d bytes queued, %,d bytes served.", server.getName(), server.getType(), server.getTier(), queuePeon.getSegmentsToLoad().size(), queuePeon.getSegmentsToDrop().size(), queuePeon.getLoadQueueSize(), server.getCurrSize());
if (log.isDebugEnabled()) {
for (DataSegment segment : queuePeon.getSegmentsToLoad()) {
log.debug("Segment to load[%s]", segment);
}
for (DataSegment segment : queuePeon.getSegmentsToDrop()) {
log.debug("Segment to drop[%s]", segment);
}
}
}
}
// Emit coordinator metrics
final Set<Map.Entry<String, LoadQueuePeon>> peonEntries = params.getLoadManagementPeons().entrySet();
for (Map.Entry<String, LoadQueuePeon> entry : peonEntries) {
String serverName = entry.getKey();
LoadQueuePeon queuePeon = entry.getValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/size", queuePeon.getLoadQueueSize()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/failed", queuePeon.getAndResetFailedAssignCount()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/count", queuePeon.getSegmentsToLoad().size()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/dropQueue/count", queuePeon.getSegmentsToDrop().size()));
}
for (Map.Entry<String, AtomicLong> entry : coordinator.getSegmentAvailability().entrySet()) {
String datasource = entry.getKey();
Long count = entry.getValue().get();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, datasource).build("segment/unavailable/count", count));
}
for (Map.Entry<String, CountingMap<String>> entry : coordinator.getReplicationStatus().entrySet()) {
String tier = entry.getKey();
CountingMap<String> datasourceAvailabilities = entry.getValue();
for (Map.Entry<String, AtomicLong> datasourceAvailability : datasourceAvailabilities.entrySet()) {
String datasource = datasourceAvailability.getKey();
Long count = datasourceAvailability.getValue().get();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.TIER, tier).setDimension(DruidMetrics.DATASOURCE, datasource).build("segment/underReplicated/count", count));
}
}
// Emit segment metrics
CountingMap<String> segmentSizes = new CountingMap<String>();
CountingMap<String> segmentCounts = new CountingMap<String>();
for (DruidDataSource dataSource : params.getDataSources()) {
for (DataSegment segment : dataSource.getSegments()) {
segmentSizes.add(dataSource.getName(), segment.getSize());
segmentCounts.add(dataSource.getName(), 1L);
}
}
for (Map.Entry<String, Long> entry : segmentSizes.snapshot().entrySet()) {
String dataSource = entry.getKey();
Long size = entry.getValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/size", size));
}
for (Map.Entry<String, Long> entry : segmentCounts.snapshot().entrySet()) {
String dataSource = entry.getKey();
Long count = entry.getValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/count", count));
}
return params;
}
use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.
the class DruidCoordinatorSegmentMerger method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
DatasourceWhitelist whitelist = whiteListRef.get();
CoordinatorStats stats = new CoordinatorStats();
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = Maps.newHashMap();
// Find serviced segments by using a timeline
for (DataSegment dataSegment : params.getAvailableSegments()) {
if (whitelist == null || whitelist.contains(dataSegment.getDataSource())) {
VersionedIntervalTimeline<String, DataSegment> timeline = dataSources.get(dataSegment.getDataSource());
if (timeline == null) {
timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural());
dataSources.put(dataSegment.getDataSource(), timeline);
}
timeline.add(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().createChunk(dataSegment));
}
}
// Find segments to merge
for (final Map.Entry<String, VersionedIntervalTimeline<String, DataSegment>> entry : dataSources.entrySet()) {
// Get serviced segments from the timeline
VersionedIntervalTimeline<String, DataSegment> timeline = entry.getValue();
List<TimelineObjectHolder<String, DataSegment>> timelineObjects = timeline.lookup(new Interval(new DateTime(0), new DateTime("3000-01-01")));
// Accumulate timelineObjects greedily until we reach our limits, then backtrack to the maximum complete set
SegmentsToMerge segmentsToMerge = new SegmentsToMerge();
for (int i = 0; i < timelineObjects.size(); i++) {
if (!segmentsToMerge.add(timelineObjects.get(i)) || segmentsToMerge.getByteCount() > params.getCoordinatorDynamicConfig().getMergeBytesLimit() || segmentsToMerge.getSegmentCount() >= params.getCoordinatorDynamicConfig().getMergeSegmentsLimit()) {
i -= segmentsToMerge.backtrack(params.getCoordinatorDynamicConfig().getMergeBytesLimit());
if (segmentsToMerge.getSegmentCount() > 1) {
stats.addToGlobalStat("mergedCount", mergeSegments(segmentsToMerge, entry.getKey()));
}
if (segmentsToMerge.getSegmentCount() == 0) {
// Backtracked all the way to zero. Increment by one so we continue to make progress.
i++;
}
segmentsToMerge = new SegmentsToMerge();
}
}
// Finish any timelineObjects to merge that may have not hit threshold
segmentsToMerge.backtrack(params.getCoordinatorDynamicConfig().getMergeBytesLimit());
if (segmentsToMerge.getSegmentCount() > 1) {
stats.addToGlobalStat("mergedCount", mergeSegments(segmentsToMerge, entry.getKey()));
}
}
log.info("Issued merge requests for %s segments", stats.getGlobalStats().get("mergedCount").get());
params.getEmitter().emit(new ServiceMetricEvent.Builder().build("coordinator/merge/count", stats.getGlobalStats().get("mergedCount")));
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.
the class LoadRule method run.
@Override
public CoordinatorStats run(DruidCoordinator coordinator, DruidCoordinatorRuntimeParams params, DataSegment segment) {
final CoordinatorStats stats = new CoordinatorStats();
final Set<DataSegment> availableSegments = params.getAvailableSegments();
final Map<String, Integer> loadStatus = Maps.newHashMap();
int totalReplicantsInCluster = params.getSegmentReplicantLookup().getTotalReplicants(segment.getIdentifier());
for (Map.Entry<String, Integer> entry : getTieredReplicants().entrySet()) {
final String tier = entry.getKey();
final int expectedReplicantsInTier = entry.getValue();
final int totalReplicantsInTier = params.getSegmentReplicantLookup().getTotalReplicants(segment.getIdentifier(), tier);
final int loadedReplicantsInTier = params.getSegmentReplicantLookup().getLoadedReplicants(segment.getIdentifier(), tier);
final MinMaxPriorityQueue<ServerHolder> serverQueue = params.getDruidCluster().getServersByTier(tier);
if (serverQueue == null) {
log.makeAlert("Tier[%s] has no servers! Check your cluster configuration!", tier).emit();
continue;
}
final List<ServerHolder> serverHolderList = Lists.newArrayList(serverQueue);
final BalancerStrategy strategy = params.getBalancerStrategy();
if (availableSegments.contains(segment)) {
CoordinatorStats assignStats = assign(params.getReplicationManager(), tier, totalReplicantsInCluster, expectedReplicantsInTier, totalReplicantsInTier, strategy, serverHolderList, segment);
stats.accumulate(assignStats);
totalReplicantsInCluster += assignStats.getPerTierStats().get(assignedCount).get(tier).get();
}
loadStatus.put(tier, expectedReplicantsInTier - loadedReplicantsInTier);
}
// Remove over-replication
stats.accumulate(drop(loadStatus, segment, params));
return stats;
}
Aggregations