use of com.linkedin.d2.balancer.strategies.relative.PartitionState in project rest.li by linkedin.
the class DegraderLoadBalancerStrategyV3 method updatePartitionState.
private void updatePartitionState(long clusterGenerationId, Partition partition, List<TrackerClient> trackerClients, DegraderLoadBalancerStrategyConfig config) {
PartitionDegraderLoadBalancerState partitionState = partition.getState();
List<TrackerClientUpdater> clientUpdaters = new ArrayList<TrackerClientUpdater>();
for (TrackerClient client : trackerClients) {
clientUpdaters.add(new TrackerClientUpdater(client, partition.getId()));
}
boolean quarantineEnabled = _state._enableQuarantine.get();
if (config.getQuarantineMaxPercent() > 0.0 && !quarantineEnabled) {
// check the hosts to see if the quarantine can be enabled.
if (_state._retryTimesForQuarantine.incrementAndGet() <= MAX_RETRIES_TO_CHECK_QUARANTINE) {
_config.getExecutorService().submit(() -> checkQuarantineState(clientUpdaters, config));
}
}
// doUpdatePartitionState has no side effects on _state or trackerClients.
// all changes to the trackerClients would be recorded in clientUpdaters
partitionState = doUpdatePartitionState(clusterGenerationId, partition.getId(), partitionState, config, clientUpdaters, quarantineEnabled);
partition.setState(partitionState);
// only if state update succeeded, do we actually apply the recorded changes to trackerClients
for (TrackerClientUpdater clientUpdater : clientUpdaters) {
clientUpdater.update();
}
}
use of com.linkedin.d2.balancer.strategies.relative.PartitionState in project rest.li by linkedin.
the class DegraderLoadBalancerStrategyV3 method updatePartitionState.
private void updatePartitionState(long clusterGenerationId, Partition partition, List<DegraderTrackerClient> trackerClients, DegraderLoadBalancerStrategyConfig config) {
PartitionDegraderLoadBalancerState partitionState = partition.getState();
List<DegraderTrackerClientUpdater> clientUpdaters = new ArrayList<>();
for (DegraderTrackerClient client : trackerClients) {
clientUpdaters.add(new DegraderTrackerClientUpdater(client, partition.getId()));
}
boolean quarantineEnabled = _state.isQuarantineEnabled();
if (config.getQuarantineMaxPercent() > 0.0 && !quarantineEnabled) {
// check the hosts to see if the quarantine can be enabled.
if (_state.incrementAndGetQuarantineRetries() <= MAX_RETRIES_TO_CHECK_QUARANTINE) {
_config.getExecutorService().submit(() -> checkQuarantineState(clientUpdaters, config));
}
}
// doUpdatePartitionState has no side effects on _state or trackerClients.
// all changes to the trackerClients would be recorded in clientUpdaters
partitionState = doUpdatePartitionState(clusterGenerationId, partition.getId(), partitionState, config, clientUpdaters, quarantineEnabled);
partition.setState(partitionState);
// only if state update succeeded, do we actually apply the recorded changes to trackerClients
for (DegraderTrackerClientUpdater clientUpdater : clientUpdaters) {
clientUpdater.update();
}
}
use of com.linkedin.d2.balancer.strategies.relative.PartitionState in project rest.li by linkedin.
the class QuarantineManager method preCheckQuarantineState.
/**
* Pre-check if quarantine can be enabled before directly enabling it
* We limit the number of server hosts to prevent too many connections to be made at once when the downstream cluster is large
*
* @param partitionState The state of the partition
* @param quarantineLatency The quarantine latency threshold
*/
private void preCheckQuarantineState(PartitionState partitionState, long quarantineLatency) {
Callback<None> healthCheckCallback = new HealthCheckCallBack<>();
partitionState.getTrackerClients().stream().limit(MAX_HOSTS_TO_PRE_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = partitionState.getHealthCheckMap().get(client);
if (healthCheckClient == null) {
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(_healthCheckOperations).setHealthCheckPath(_quarantineProperties.getHealthCheckPath()).setServicePath(_servicePath).setClock(_clock).setLatency(quarantineLatency).setMethod(_quarantineProperties.getHealthCheckMethod().toString()).setClient(client).build();
partitionState.getHealthCheckMap().put(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
LOG.error("Error to build healthCheckClient ", e);
}
});
}
use of com.linkedin.d2.balancer.strategies.relative.PartitionState in project rest.li by linkedin.
the class QuarantineManager method checkAndRemoveQuarantine.
/**
* Check if the quarantine still applies for each tracker client.
* Remove it from the map if the quarantine is no long applicable. Put the client into recovery state right after the quarantine.
*
* @param partitionState The current state of the partition
*/
private void checkAndRemoveQuarantine(PartitionState partitionState) {
Map<TrackerClient, LoadBalancerQuarantine> quarantineMap = partitionState.getQuarantineMap();
Map<TrackerClient, LoadBalancerQuarantine> quarantineHistory = partitionState.getQuarantineHistory();
Set<TrackerClient> recoverySet = partitionState.getRecoveryTrackerClients();
for (TrackerClient trackerClient : partitionState.getTrackerClients()) {
LoadBalancerQuarantine quarantine = quarantineMap.get(trackerClient);
if (quarantine != null && quarantine.checkUpdateQuarantineState()) {
// Evict client from quarantine
quarantineMap.remove(trackerClient);
quarantineHistory.put(trackerClient, quarantine);
recoverySet.add(trackerClient);
}
}
}
use of com.linkedin.d2.balancer.strategies.relative.PartitionState in project rest.li by linkedin.
the class StateUpdater method calculateBaseHealthScore.
private void calculateBaseHealthScore(Set<TrackerClient> trackerClients, PartitionState partitionState, long avgClusterLatency, Map<TrackerClient, CallTracker.CallStats> lastCallStatsMap) {
Map<TrackerClient, TrackerClientState> trackerClientStateMap = partitionState.getTrackerClientStateMap();
// Update health score
long clusterCallCount = 0;
long clusterErrorCount = 0;
for (TrackerClient trackerClient : trackerClients) {
CallTracker.CallStats latestCallStats = lastCallStatsMap.get(trackerClient);
if (trackerClientStateMap.containsKey(trackerClient)) {
TrackerClientState trackerClientState = trackerClientStateMap.get(trackerClient);
int callCount = latestCallStats.getCallCount() + latestCallStats.getOutstandingCount();
if (trackerClient.doNotLoadBalance()) {
trackerClientState.setHealthState(TrackerClientState.HealthState.HEALTHY);
trackerClientState.setHealthScore(MAX_HEALTH_SCORE);
trackerClientState.setCallCount(callCount);
} else {
double errorRate = getErrorRate(latestCallStats.getErrorTypeCounts(), callCount);
long avgLatency = getAvgHostLatency(latestCallStats);
double oldHealthScore = trackerClientState.getHealthScore();
double newHealthScore = oldHealthScore;
clusterCallCount += callCount;
clusterErrorCount += errorRate * callCount;
if (isUnhealthy(trackerClientState, avgClusterLatency, callCount, avgLatency, errorRate)) {
// If it is above high latency, we reduce the health score by down step
newHealthScore = Double.max(trackerClientState.getHealthScore() - _relativeStrategyProperties.getDownStep(), MIN_HEALTH_SCORE);
trackerClientState.setHealthState(TrackerClientState.HealthState.UNHEALTHY);
LOG.debug("Host is unhealthy. Host: " + trackerClient.toString() + ", errorRate: " + errorRate + ", latency: " + avgClusterLatency + ", callCount: " + callCount + ", healthScore dropped from " + trackerClientState.getHealthScore() + " to " + newHealthScore);
} else if (trackerClientState.getHealthScore() < MAX_HEALTH_SCORE && isHealthy(trackerClientState, avgClusterLatency, callCount, avgLatency, errorRate)) {
if (oldHealthScore < _relativeStrategyProperties.getSlowStartThreshold()) {
// If the client is healthy and slow start is enabled, we double the health score
newHealthScore = oldHealthScore > MIN_HEALTH_SCORE ? Math.min(MAX_HEALTH_SCORE, SLOW_START_RECOVERY_FACTOR * oldHealthScore) : SLOW_START_INITIAL_HEALTH_SCORE;
} else {
// If slow start is not enabled, we just increase the health score by up step
newHealthScore = Math.min(MAX_HEALTH_SCORE, oldHealthScore + _relativeStrategyProperties.getUpStep());
}
trackerClientState.setHealthState(TrackerClientState.HealthState.HEALTHY);
} else {
trackerClientState.setHealthState(TrackerClientState.HealthState.NEUTRAL);
}
trackerClientState.setHealthScore(newHealthScore);
trackerClientState.setCallCount(callCount);
}
} else {
// Initializing a new client score
if (trackerClient.doNotSlowStart() || trackerClient.doNotLoadBalance()) {
trackerClientStateMap.put(trackerClient, new TrackerClientState(MAX_HEALTH_SCORE, _relativeStrategyProperties.getMinCallCount()));
} else {
trackerClientStateMap.put(trackerClient, new TrackerClientState(_relativeStrategyProperties.getInitialHealthScore(), _relativeStrategyProperties.getMinCallCount()));
}
}
}
partitionState.setPartitionStats(avgClusterLatency, clusterCallCount, clusterErrorCount);
}
Aggregations