Search in sources :

Example 1 with TrackerClientState

use of com.linkedin.d2.balancer.strategies.relative.TrackerClientState in project rest.li by linkedin.

the class StateUpdater method calculateBaseHealthScore.

private void calculateBaseHealthScore(Set<TrackerClient> trackerClients, PartitionState partitionState, long avgClusterLatency, Map<TrackerClient, CallTracker.CallStats> lastCallStatsMap) {
    Map<TrackerClient, TrackerClientState> trackerClientStateMap = partitionState.getTrackerClientStateMap();
    // Update health score
    long clusterCallCount = 0;
    long clusterErrorCount = 0;
    for (TrackerClient trackerClient : trackerClients) {
        CallTracker.CallStats latestCallStats = lastCallStatsMap.get(trackerClient);
        if (trackerClientStateMap.containsKey(trackerClient)) {
            TrackerClientState trackerClientState = trackerClientStateMap.get(trackerClient);
            int callCount = latestCallStats.getCallCount() + latestCallStats.getOutstandingCount();
            if (trackerClient.doNotLoadBalance()) {
                trackerClientState.setHealthState(TrackerClientState.HealthState.HEALTHY);
                trackerClientState.setHealthScore(MAX_HEALTH_SCORE);
                trackerClientState.setCallCount(callCount);
            } else {
                double errorRate = getErrorRate(latestCallStats.getErrorTypeCounts(), callCount);
                long avgLatency = getAvgHostLatency(latestCallStats);
                double oldHealthScore = trackerClientState.getHealthScore();
                double newHealthScore = oldHealthScore;
                clusterCallCount += callCount;
                clusterErrorCount += errorRate * callCount;
                if (isUnhealthy(trackerClientState, avgClusterLatency, callCount, avgLatency, errorRate)) {
                    // If it is above high latency, we reduce the health score by down step
                    newHealthScore = Double.max(trackerClientState.getHealthScore() - _relativeStrategyProperties.getDownStep(), MIN_HEALTH_SCORE);
                    trackerClientState.setHealthState(TrackerClientState.HealthState.UNHEALTHY);
                    LOG.debug("Host is unhealthy. Host: " + trackerClient.toString() + ", errorRate: " + errorRate + ", latency: " + avgClusterLatency + ", callCount: " + callCount + ", healthScore dropped from " + trackerClientState.getHealthScore() + " to " + newHealthScore);
                } else if (trackerClientState.getHealthScore() < MAX_HEALTH_SCORE && isHealthy(trackerClientState, avgClusterLatency, callCount, avgLatency, errorRate)) {
                    if (oldHealthScore < _relativeStrategyProperties.getSlowStartThreshold()) {
                        // If the client is healthy and slow start is enabled, we double the health score
                        newHealthScore = oldHealthScore > MIN_HEALTH_SCORE ? Math.min(MAX_HEALTH_SCORE, SLOW_START_RECOVERY_FACTOR * oldHealthScore) : SLOW_START_INITIAL_HEALTH_SCORE;
                    } else {
                        // If slow start is not enabled, we just increase the health score by up step
                        newHealthScore = Math.min(MAX_HEALTH_SCORE, oldHealthScore + _relativeStrategyProperties.getUpStep());
                    }
                    trackerClientState.setHealthState(TrackerClientState.HealthState.HEALTHY);
                } else {
                    trackerClientState.setHealthState(TrackerClientState.HealthState.NEUTRAL);
                }
                trackerClientState.setHealthScore(newHealthScore);
                trackerClientState.setCallCount(callCount);
            }
        } else {
            // Initializing a new client score
            if (trackerClient.doNotSlowStart() || trackerClient.doNotLoadBalance()) {
                trackerClientStateMap.put(trackerClient, new TrackerClientState(MAX_HEALTH_SCORE, _relativeStrategyProperties.getMinCallCount()));
            } else {
                trackerClientStateMap.put(trackerClient, new TrackerClientState(_relativeStrategyProperties.getInitialHealthScore(), _relativeStrategyProperties.getMinCallCount()));
            }
        }
    }
    partitionState.setPartitionStats(avgClusterLatency, clusterCallCount, clusterErrorCount);
}
Also used : TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) CallTracker(com.linkedin.util.degrader.CallTracker)

Example 2 with TrackerClientState

use of com.linkedin.d2.balancer.strategies.relative.TrackerClientState in project rest.li by linkedin.

the class RelativeLoadBalancerStrategyJmxTest method mockRelativeLoadBalancerStrategyJmx.

private RelativeLoadBalancerStrategyJmx mockRelativeLoadBalancerStrategyJmx(List<TrackerClient> trackerClients) {
    Map<TrackerClient, TrackerClientState> trackerClientsMap = new HashMap<>();
    for (TrackerClient trackerClient : trackerClients) {
        trackerClientsMap.put(trackerClient, new TrackerClientState(1, 1));
    }
    RelativeLoadBalancerStrategy strategy = Mockito.mock(RelativeLoadBalancerStrategy.class);
    PartitionState state = Mockito.mock(PartitionState.class);
    Mockito.when(state.getTrackerClientStateMap()).thenReturn(trackerClientsMap);
    Mockito.when(strategy.getFirstValidPartitionId()).thenReturn(DefaultPartitionAccessor.DEFAULT_PARTITION_ID);
    Mockito.when(strategy.getPartitionState(anyInt())).thenReturn(state);
    return new RelativeLoadBalancerStrategyJmx(strategy);
}
Also used : RelativeLoadBalancerStrategy(com.linkedin.d2.balancer.strategies.relative.RelativeLoadBalancerStrategy) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) HashMap(java.util.HashMap) TrackerClientState(com.linkedin.d2.balancer.strategies.relative.TrackerClientState) PartitionState(com.linkedin.d2.balancer.strategies.relative.PartitionState)

Example 3 with TrackerClientState

use of com.linkedin.d2.balancer.strategies.relative.TrackerClientState in project rest.li by linkedin.

the class QuarantineManager method enrollNewQuarantineAndRecovery.

/**
 * Enroll new tracker client to quarantine or recovery state
 *
 * @param newPartitionState The new state of the partition
 * @param oldPartitionState The old state of the partition
 * @param quarantineLatency The latency threshold for D2 quarantine
 */
private void enrollNewQuarantineAndRecovery(PartitionState newPartitionState, PartitionState oldPartitionState, long quarantineLatency, long currentTime) {
    int partitionId = newPartitionState.getPartitionId();
    Map<TrackerClient, LoadBalancerQuarantine> quarantineMap = newPartitionState.getQuarantineMap();
    Map<TrackerClient, LoadBalancerQuarantine> quarantineHistory = newPartitionState.getQuarantineHistory();
    Set<TrackerClient> recoverySet = newPartitionState.getRecoveryTrackerClients();
    for (TrackerClient trackerClient : newPartitionState.getTrackerClients()) {
        TrackerClientState trackerClientState = newPartitionState.getTrackerClientStateMap().get(trackerClient);
        double serverWeight = trackerClient.getPartitionWeight(partitionId);
        // Check and enroll quarantine map
        boolean isQuarantined = enrollClientInQuarantineMap(trackerClient, trackerClientState, serverWeight, quarantineMap, quarantineHistory, newPartitionState.getTrackerClientStateMap().size(), quarantineLatency, currentTime);
        if (!isQuarantined) {
            if (!_fastRecoveryEnabled) {
                performNormalRecovery(trackerClientState);
            } else {
                // Only enroll the client into recovery state if fast recovery is enabled
                enrollSingleClientInRecoverySet(trackerClient, trackerClientState, serverWeight, recoverySet, oldPartitionState);
            }
        }
    }
}
Also used : LoadBalancerQuarantine(com.linkedin.d2.balancer.strategies.LoadBalancerQuarantine) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient)

Example 4 with TrackerClientState

use of com.linkedin.d2.balancer.strategies.relative.TrackerClientState in project rest.li by linkedin.

the class StateUpdater method updateBaseHealthScoreAndState.

/**
 * Update the health score of all tracker clients for the service
 */
private void updateBaseHealthScoreAndState(Set<TrackerClient> trackerClients, PartitionState partitionState, long clusterAvgLatency, boolean clusterUpdated, Map<TrackerClient, CallTracker.CallStats> lastCallStatsMap) {
    // Calculate the base health score before we override them when handling the quarantine and recovery
    calculateBaseHealthScore(trackerClients, partitionState, clusterAvgLatency, lastCallStatsMap);
    // Remove the trackerClients from original map if there is any change in uri list
    Map<TrackerClient, TrackerClientState> trackerClientStateMap = partitionState.getTrackerClientStateMap();
    if (clusterUpdated) {
        List<TrackerClient> trackerClientsToRemove = trackerClientStateMap.keySet().stream().filter(oldTrackerClient -> !trackerClients.contains(oldTrackerClient)).collect(Collectors.toList());
        for (TrackerClient trackerClient : trackerClientsToRemove) {
            partitionState.removeTrackerClient(trackerClient);
        }
    }
}
Also used : Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) D2RelativeStrategyProperties(com.linkedin.d2.D2RelativeStrategyProperties) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) PartitionStateUpdateListener(com.linkedin.d2.balancer.strategies.PartitionStateUpdateListener) ConcurrentMap(java.util.concurrent.ConcurrentMap) TimeUnit(java.util.concurrent.TimeUnit) ErrorType(com.linkedin.util.degrader.ErrorType) List(java.util.List) Lock(java.util.concurrent.locks.Lock) CallTracker(com.linkedin.util.degrader.CallTracker) Map(java.util.Map) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) DelegatingRingFactory(com.linkedin.d2.balancer.strategies.DelegatingRingFactory) Ring(com.linkedin.d2.balancer.util.hashing.Ring) URI(java.net.URI) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient)

Aggregations

TrackerClient (com.linkedin.d2.balancer.clients.TrackerClient)4 CallTracker (com.linkedin.util.degrader.CallTracker)2 HashMap (java.util.HashMap)2 D2RelativeStrategyProperties (com.linkedin.d2.D2RelativeStrategyProperties)1 DelegatingRingFactory (com.linkedin.d2.balancer.strategies.DelegatingRingFactory)1 LoadBalancerQuarantine (com.linkedin.d2.balancer.strategies.LoadBalancerQuarantine)1 PartitionStateUpdateListener (com.linkedin.d2.balancer.strategies.PartitionStateUpdateListener)1 PartitionState (com.linkedin.d2.balancer.strategies.relative.PartitionState)1 RelativeLoadBalancerStrategy (com.linkedin.d2.balancer.strategies.relative.RelativeLoadBalancerStrategy)1 TrackerClientState (com.linkedin.d2.balancer.strategies.relative.TrackerClientState)1 Ring (com.linkedin.d2.balancer.util.hashing.Ring)1 ErrorType (com.linkedin.util.degrader.ErrorType)1 URI (java.net.URI)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 TimeUnit (java.util.concurrent.TimeUnit)1