Search in sources :

Example 11 with CoordinatorStats

use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.

the class DruidCoordinatorBalancer method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    final CoordinatorStats stats = new CoordinatorStats();
    final BalancerStrategy strategy = params.getBalancerStrategy();
    final int maxSegmentsToMove = params.getCoordinatorDynamicConfig().getMaxSegmentsToMove();
    for (Map.Entry<String, MinMaxPriorityQueue<ServerHolder>> entry : params.getDruidCluster().getCluster().entrySet()) {
        String tier = entry.getKey();
        if (currentlyMovingSegments.get(tier) == null) {
            currentlyMovingSegments.put(tier, new ConcurrentHashMap<String, BalancerSegmentHolder>());
        }
        if (!currentlyMovingSegments.get(tier).isEmpty()) {
            reduceLifetimes(tier);
            log.info("[%s]: Still waiting on %,d segments to be moved", tier, currentlyMovingSegments.size());
            continue;
        }
        final List<ServerHolder> serverHolderList = Lists.newArrayList(entry.getValue());
        if (serverHolderList.size() <= 1) {
            log.info("[%s]: One or fewer servers found.  Cannot balance.", tier);
            continue;
        }
        int numSegments = 0;
        for (ServerHolder server : serverHolderList) {
            numSegments += server.getServer().getSegments().size();
        }
        if (numSegments == 0) {
            log.info("No segments found.  Cannot balance.");
            continue;
        }
        long unmoved = 0L;
        for (int iter = 0; iter < maxSegmentsToMove; iter++) {
            final BalancerSegmentHolder segmentToMove = strategy.pickSegmentToMove(serverHolderList);
            if (segmentToMove != null && params.getAvailableSegments().contains(segmentToMove.getSegment())) {
                final ServerHolder holder = strategy.findNewSegmentHomeBalancer(segmentToMove.getSegment(), serverHolderList);
                if (holder != null) {
                    moveSegment(segmentToMove, holder.getServer(), params);
                } else {
                    ++unmoved;
                }
            }
        }
        if (unmoved == maxSegmentsToMove) {
            // Cluster should be alive and constantly adjusting
            log.info("No good moves found in tier [%s]", tier);
        }
        stats.addToTieredStat("unmovedCount", tier, unmoved);
        stats.addToTieredStat("movedCount", tier, currentlyMovingSegments.get(tier).size());
        if (params.getCoordinatorDynamicConfig().emitBalancingStats()) {
            strategy.emitStats(tier, stats, serverHolderList);
        }
        log.info("[%s]: Segments Moved: [%d] Segments Let Alone: [%d]", tier, currentlyMovingSegments.get(tier).size(), unmoved);
    }
    return params.buildFromExisting().withCoordinatorStats(stats).build();
}
Also used : CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) BalancerStrategy(io.druid.server.coordinator.BalancerStrategy) ServerHolder(io.druid.server.coordinator.ServerHolder) MinMaxPriorityQueue(com.google.common.collect.MinMaxPriorityQueue) BalancerSegmentHolder(io.druid.server.coordinator.BalancerSegmentHolder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map)

Example 12 with CoordinatorStats

use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.

the class DruidCoordinatorCleanupOvershadowed method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    CoordinatorStats stats = new CoordinatorStats();
    // Unservice old partitions if we've had enough time to make sure we aren't flapping with old data
    if (params.hasDeletionWaitTimeElapsed()) {
        DruidCluster cluster = params.getDruidCluster();
        Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = Maps.newHashMap();
        for (MinMaxPriorityQueue<ServerHolder> serverHolders : cluster.getSortedServersByTier()) {
            for (ServerHolder serverHolder : serverHolders) {
                ImmutableDruidServer server = serverHolder.getServer();
                for (ImmutableDruidDataSource dataSource : server.getDataSources()) {
                    VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSource.getName());
                    if (timeline == null) {
                        timeline = new VersionedIntervalTimeline<>(Comparators.comparable());
                        timelines.put(dataSource.getName(), timeline);
                    }
                    for (DataSegment segment : dataSource.getSegments()) {
                        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
                    }
                }
            }
        }
        //Remove all segments in db that are overshadowed by served segments
        for (DataSegment dataSegment : params.getAvailableSegments()) {
            VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSegment.getDataSource());
            if (timeline != null && timeline.isOvershadowed(dataSegment.getInterval(), dataSegment.getVersion())) {
                coordinator.removeSegment(dataSegment);
                stats.addToGlobalStat("overShadowedCount", 1);
            }
        }
    }
    return params.buildFromExisting().withCoordinatorStats(stats).build();
}
Also used : CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) ImmutableDruidDataSource(io.druid.client.ImmutableDruidDataSource) ServerHolder(io.druid.server.coordinator.ServerHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) DruidCluster(io.druid.server.coordinator.DruidCluster) DataSegment(io.druid.timeline.DataSegment) ImmutableDruidServer(io.druid.client.ImmutableDruidServer)

Example 13 with CoordinatorStats

use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.

the class DruidCoordinatorLogger method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    DruidCluster cluster = params.getDruidCluster();
    CoordinatorStats stats = params.getCoordinatorStats();
    ServiceEmitter emitter = params.getEmitter();
    Map<String, AtomicLong> assigned = stats.getPerTierStats().get("assignedCount");
    if (assigned != null) {
        for (Map.Entry<String, AtomicLong> entry : assigned.entrySet()) {
            log.info("[%s] : Assigned %s segments among %,d servers", entry.getKey(), entry.getValue().get(), cluster.get(entry.getKey()).size());
        }
    }
    emitTieredStats(emitter, "segment/assigned/count", assigned);
    Map<String, AtomicLong> dropped = stats.getPerTierStats().get("droppedCount");
    if (dropped != null) {
        for (Map.Entry<String, AtomicLong> entry : dropped.entrySet()) {
            log.info("[%s] : Dropped %s segments among %,d servers", entry.getKey(), entry.getValue().get(), cluster.get(entry.getKey()).size());
        }
    }
    emitTieredStats(emitter, "segment/dropped/count", dropped);
    emitTieredStats(emitter, "segment/cost/raw", stats.getPerTierStats().get("initialCost"));
    emitTieredStats(emitter, "segment/cost/normalization", stats.getPerTierStats().get("normalization"));
    emitTieredStats(emitter, "segment/moved/count", stats.getPerTierStats().get("movedCount"));
    emitTieredStats(emitter, "segment/deleted/count", stats.getPerTierStats().get("deletedCount"));
    Map<String, AtomicLong> normalized = stats.getPerTierStats().get("normalizedInitialCostTimesOneThousand");
    if (normalized != null) {
        emitTieredStats(emitter, "segment/cost/normalized", Maps.transformEntries(normalized, new Maps.EntryTransformer<String, AtomicLong, Number>() {

            @Override
            public Number transformEntry(String key, AtomicLong value) {
                return value.doubleValue() / 1000d;
            }
        }));
    }
    Map<String, AtomicLong> unneeded = stats.getPerTierStats().get("unneededCount");
    if (unneeded != null) {
        for (Map.Entry<String, AtomicLong> entry : unneeded.entrySet()) {
            log.info("[%s] : Removed %s unneeded segments among %,d servers", entry.getKey(), entry.getValue().get(), cluster.get(entry.getKey()).size());
        }
    }
    emitTieredStats(emitter, "segment/unneeded/count", stats.getPerTierStats().get("unneededCount"));
    emitter.emit(new ServiceMetricEvent.Builder().build("segment/overShadowed/count", stats.getGlobalStats().get("overShadowedCount")));
    Map<String, AtomicLong> moved = stats.getPerTierStats().get("movedCount");
    if (moved != null) {
        for (Map.Entry<String, AtomicLong> entry : moved.entrySet()) {
            log.info("[%s] : Moved %,d segment(s)", entry.getKey(), entry.getValue().get());
        }
    }
    final Map<String, AtomicLong> unmoved = stats.getPerTierStats().get("unmovedCount");
    if (unmoved != null) {
        for (Map.Entry<String, AtomicLong> entry : unmoved.entrySet()) {
            log.info("[%s] : Let alone %,d segment(s)", entry.getKey(), entry.getValue().get());
        }
    }
    log.info("Load Queues:");
    for (MinMaxPriorityQueue<ServerHolder> serverHolders : cluster.getSortedServersByTier()) {
        for (ServerHolder serverHolder : serverHolders) {
            ImmutableDruidServer server = serverHolder.getServer();
            LoadQueuePeon queuePeon = serverHolder.getPeon();
            log.info("Server[%s, %s, %s] has %,d left to load, %,d left to drop, %,d bytes queued, %,d bytes served.", server.getName(), server.getType(), server.getTier(), queuePeon.getSegmentsToLoad().size(), queuePeon.getSegmentsToDrop().size(), queuePeon.getLoadQueueSize(), server.getCurrSize());
            if (log.isDebugEnabled()) {
                for (DataSegment segment : queuePeon.getSegmentsToLoad()) {
                    log.debug("Segment to load[%s]", segment);
                }
                for (DataSegment segment : queuePeon.getSegmentsToDrop()) {
                    log.debug("Segment to drop[%s]", segment);
                }
            }
        }
    }
    // Emit coordinator metrics
    final Set<Map.Entry<String, LoadQueuePeon>> peonEntries = params.getLoadManagementPeons().entrySet();
    for (Map.Entry<String, LoadQueuePeon> entry : peonEntries) {
        String serverName = entry.getKey();
        LoadQueuePeon queuePeon = entry.getValue();
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/size", queuePeon.getLoadQueueSize()));
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/failed", queuePeon.getAndResetFailedAssignCount()));
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/count", queuePeon.getSegmentsToLoad().size()));
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/dropQueue/count", queuePeon.getSegmentsToDrop().size()));
    }
    for (Map.Entry<String, AtomicLong> entry : coordinator.getSegmentAvailability().entrySet()) {
        String datasource = entry.getKey();
        Long count = entry.getValue().get();
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, datasource).build("segment/unavailable/count", count));
    }
    for (Map.Entry<String, CountingMap<String>> entry : coordinator.getReplicationStatus().entrySet()) {
        String tier = entry.getKey();
        CountingMap<String> datasourceAvailabilities = entry.getValue();
        for (Map.Entry<String, AtomicLong> datasourceAvailability : datasourceAvailabilities.entrySet()) {
            String datasource = datasourceAvailability.getKey();
            Long count = datasourceAvailability.getValue().get();
            emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.TIER, tier).setDimension(DruidMetrics.DATASOURCE, datasource).build("segment/underReplicated/count", count));
        }
    }
    // Emit segment metrics
    CountingMap<String> segmentSizes = new CountingMap<String>();
    CountingMap<String> segmentCounts = new CountingMap<String>();
    for (DruidDataSource dataSource : params.getDataSources()) {
        for (DataSegment segment : dataSource.getSegments()) {
            segmentSizes.add(dataSource.getName(), segment.getSize());
            segmentCounts.add(dataSource.getName(), 1L);
        }
    }
    for (Map.Entry<String, Long> entry : segmentSizes.snapshot().entrySet()) {
        String dataSource = entry.getKey();
        Long size = entry.getValue();
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/size", size));
    }
    for (Map.Entry<String, Long> entry : segmentCounts.snapshot().entrySet()) {
        String dataSource = entry.getKey();
        Long count = entry.getValue();
        emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/count", count));
    }
    return params;
}
Also used : ServiceEmitter(com.metamx.emitter.service.ServiceEmitter) CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) DataSegment(io.druid.timeline.DataSegment) LoadQueuePeon(io.druid.server.coordinator.LoadQueuePeon) DruidCluster(io.druid.server.coordinator.DruidCluster) DruidDataSource(io.druid.client.DruidDataSource) CountingMap(io.druid.collections.CountingMap) AtomicLong(java.util.concurrent.atomic.AtomicLong) ServerHolder(io.druid.server.coordinator.ServerHolder) AtomicLong(java.util.concurrent.atomic.AtomicLong) ServiceMetricEvent(com.metamx.emitter.service.ServiceMetricEvent) CountingMap(io.druid.collections.CountingMap) Map(java.util.Map) ImmutableDruidServer(io.druid.client.ImmutableDruidServer)

Example 14 with CoordinatorStats

use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.

the class DruidCoordinatorSegmentMerger method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    DatasourceWhitelist whitelist = whiteListRef.get();
    CoordinatorStats stats = new CoordinatorStats();
    Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = Maps.newHashMap();
    // Find serviced segments by using a timeline
    for (DataSegment dataSegment : params.getAvailableSegments()) {
        if (whitelist == null || whitelist.contains(dataSegment.getDataSource())) {
            VersionedIntervalTimeline<String, DataSegment> timeline = dataSources.get(dataSegment.getDataSource());
            if (timeline == null) {
                timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural());
                dataSources.put(dataSegment.getDataSource(), timeline);
            }
            timeline.add(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().createChunk(dataSegment));
        }
    }
    // Find segments to merge
    for (final Map.Entry<String, VersionedIntervalTimeline<String, DataSegment>> entry : dataSources.entrySet()) {
        // Get serviced segments from the timeline
        VersionedIntervalTimeline<String, DataSegment> timeline = entry.getValue();
        List<TimelineObjectHolder<String, DataSegment>> timelineObjects = timeline.lookup(new Interval(new DateTime(0), new DateTime("3000-01-01")));
        // Accumulate timelineObjects greedily until we reach our limits, then backtrack to the maximum complete set
        SegmentsToMerge segmentsToMerge = new SegmentsToMerge();
        for (int i = 0; i < timelineObjects.size(); i++) {
            if (!segmentsToMerge.add(timelineObjects.get(i)) || segmentsToMerge.getByteCount() > params.getCoordinatorDynamicConfig().getMergeBytesLimit() || segmentsToMerge.getSegmentCount() >= params.getCoordinatorDynamicConfig().getMergeSegmentsLimit()) {
                i -= segmentsToMerge.backtrack(params.getCoordinatorDynamicConfig().getMergeBytesLimit());
                if (segmentsToMerge.getSegmentCount() > 1) {
                    stats.addToGlobalStat("mergedCount", mergeSegments(segmentsToMerge, entry.getKey()));
                }
                if (segmentsToMerge.getSegmentCount() == 0) {
                    // Backtracked all the way to zero. Increment by one so we continue to make progress.
                    i++;
                }
                segmentsToMerge = new SegmentsToMerge();
            }
        }
        // Finish any timelineObjects to merge that may have not hit threshold
        segmentsToMerge.backtrack(params.getCoordinatorDynamicConfig().getMergeBytesLimit());
        if (segmentsToMerge.getSegmentCount() > 1) {
            stats.addToGlobalStat("mergedCount", mergeSegments(segmentsToMerge, entry.getKey()));
        }
    }
    log.info("Issued merge requests for %s segments", stats.getGlobalStats().get("mergedCount").get());
    params.getEmitter().emit(new ServiceMetricEvent.Builder().build("coordinator/merge/count", stats.getGlobalStats().get("mergedCount")));
    return params.buildFromExisting().withCoordinatorStats(stats).build();
}
Also used : CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) DatasourceWhitelist(io.druid.server.coordinator.DatasourceWhitelist) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Map(java.util.Map) Interval(org.joda.time.Interval)

Example 15 with CoordinatorStats

use of io.druid.server.coordinator.CoordinatorStats in project druid by druid-io.

the class LoadRule method run.

@Override
public CoordinatorStats run(DruidCoordinator coordinator, DruidCoordinatorRuntimeParams params, DataSegment segment) {
    final CoordinatorStats stats = new CoordinatorStats();
    final Set<DataSegment> availableSegments = params.getAvailableSegments();
    final Map<String, Integer> loadStatus = Maps.newHashMap();
    int totalReplicantsInCluster = params.getSegmentReplicantLookup().getTotalReplicants(segment.getIdentifier());
    for (Map.Entry<String, Integer> entry : getTieredReplicants().entrySet()) {
        final String tier = entry.getKey();
        final int expectedReplicantsInTier = entry.getValue();
        final int totalReplicantsInTier = params.getSegmentReplicantLookup().getTotalReplicants(segment.getIdentifier(), tier);
        final int loadedReplicantsInTier = params.getSegmentReplicantLookup().getLoadedReplicants(segment.getIdentifier(), tier);
        final MinMaxPriorityQueue<ServerHolder> serverQueue = params.getDruidCluster().getServersByTier(tier);
        if (serverQueue == null) {
            log.makeAlert("Tier[%s] has no servers! Check your cluster configuration!", tier).emit();
            continue;
        }
        final List<ServerHolder> serverHolderList = Lists.newArrayList(serverQueue);
        final BalancerStrategy strategy = params.getBalancerStrategy();
        if (availableSegments.contains(segment)) {
            CoordinatorStats assignStats = assign(params.getReplicationManager(), tier, totalReplicantsInCluster, expectedReplicantsInTier, totalReplicantsInTier, strategy, serverHolderList, segment);
            stats.accumulate(assignStats);
            totalReplicantsInCluster += assignStats.getPerTierStats().get(assignedCount).get(tier).get();
        }
        loadStatus.put(tier, expectedReplicantsInTier - loadedReplicantsInTier);
    }
    // Remove over-replication
    stats.accumulate(drop(loadStatus, segment, params));
    return stats;
}
Also used : CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) DataSegment(io.druid.timeline.DataSegment) BalancerStrategy(io.druid.server.coordinator.BalancerStrategy) ServerHolder(io.druid.server.coordinator.ServerHolder) Map(java.util.Map)

Aggregations

CoordinatorStats (io.druid.server.coordinator.CoordinatorStats)15 ServerHolder (io.druid.server.coordinator.ServerHolder)12 DataSegment (io.druid.timeline.DataSegment)11 DruidCluster (io.druid.server.coordinator.DruidCluster)9 Map (java.util.Map)9 BalancerStrategy (io.druid.server.coordinator.BalancerStrategy)6 DateTime (org.joda.time.DateTime)6 Interval (org.joda.time.Interval)5 Test (org.junit.Test)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)4 DruidServer (io.druid.client.DruidServer)4 CostBalancerStrategyFactory (io.druid.server.coordinator.CostBalancerStrategyFactory)4 ImmutableDruidServer (io.druid.client.ImmutableDruidServer)3 VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)3 ImmutableDruidDataSource (io.druid.client.ImmutableDruidDataSource)2 DruidCoordinatorRuntimeParams (io.druid.server.coordinator.DruidCoordinatorRuntimeParams)2 LoadPeonCallback (io.druid.server.coordinator.LoadPeonCallback)2 LoadQueuePeon (io.druid.server.coordinator.LoadQueuePeon)2 MinMaxPriorityQueue (com.google.common.collect.MinMaxPriorityQueue)1