use of com.linkedin.d2.balancer.util.healthcheck.HealthCheckClientBuilder in project rest.li by linkedin.
the class DegraderLoadBalancerStrategyV3 method checkQuarantineState.
/**
* checkQuarantineState decides if the D2Quarantine can be enabled or not, by health
* checking all the trackerClients once. It enables quarantine only if at least one of the
* clients return success for the checking.
*
* The reasons for this checking include:
*
* . The default method "OPTIONS" is not always enabled by the service
* . The user can config any path/method for checking. We do a sanity checking to
* make sure the configuration is correct, and service/host responds in time.
* Otherwise the host can be kept in quarantine forever if we blindly enable it.
*
* This check actually can warm up the R2 connection pool by making a connection to
* each trackerClient. However since the check happens before any real requests are sent,
* it generally takes much longer time to get the results, due to different warming up
* requirements. Therefore the checking will be retried in next update if current check
* fails.
*
* This function is supposed to be protected by the update lock.
*
* @param clients
* @param config
*/
private void checkQuarantineState(List<TrackerClientUpdater> clients, DegraderLoadBalancerStrategyConfig config) {
Callback<None> healthCheckCallback = new Callback<None>() {
@Override
public void onError(Throwable e) {
// Do nothing as the quarantine is disabled by default
_rateLimitedLogger.error("Error to enable quarantine. Health checking failed for service {}: ", _state._serviceName, e);
}
@Override
public void onSuccess(None result) {
if (_state._enableQuarantine.compareAndSet(false, true)) {
_log.info("Quarantine is enabled for service {}", _state._serviceName);
}
}
};
// Ideally we would like to healthchecking all the service hosts (ie all TrackerClients) because
// this can help to warm up the R2 connections to the service hosts, thus speed up the initial access
// speed when d2client starts to access those hosts. However this can expose/expedite the problem that
// the d2client host needs too many connections or file handles to all the hosts, when the downstream
// services have large amount of hosts. Before that problem is addressed, we limit the number of hosts
// for pre-healthchecking to a small number
clients.stream().limit(MAX_HOSTS_TO_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = _state.getHealthCheckClient(client);
if (healthCheckClient == null) {
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(config.getHealthCheckOperations()).setHealthCheckPath(config.getHealthCheckPath()).setServicePath(config.getServicePath()).setClock(config.getClock()).setLatency(config.getQuarantineLatency()).setMethod(config.getHealthCheckMethod()).setClient(client.getTrackerClient()).build();
_state.putHealthCheckClient(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
_log.error("Error to build healthCheckClient ", e);
}
});
// also remove the entries that the corresponding trackerClientUpdaters do not exist anymore
for (TrackerClientUpdater client : _state._healthCheckMap.keySet()) {
if (!clients.contains(client)) {
_state._healthCheckMap.remove(client);
}
}
}
use of com.linkedin.d2.balancer.util.healthcheck.HealthCheckClientBuilder in project rest.li by linkedin.
the class QuarantineManager method preCheckQuarantineState.
/**
* Pre-check if quarantine can be enabled before directly enabling it
* We limit the number of server hosts to prevent too many connections to be made at once when the downstream cluster is large
*
* @param partitionState The state of the partition
* @param quarantineLatency The quarantine latency threshold
*/
private void preCheckQuarantineState(PartitionState partitionState, long quarantineLatency) {
Callback<None> healthCheckCallback = new HealthCheckCallBack<>();
partitionState.getTrackerClients().stream().limit(MAX_HOSTS_TO_PRE_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = partitionState.getHealthCheckMap().get(client);
if (healthCheckClient == null) {
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(_healthCheckOperations).setHealthCheckPath(_quarantineProperties.getHealthCheckPath()).setServicePath(_servicePath).setClock(_clock).setLatency(quarantineLatency).setMethod(_quarantineProperties.getHealthCheckMethod().toString()).setClient(client).build();
partitionState.getHealthCheckMap().put(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
LOG.error("Error to build healthCheckClient ", e);
}
});
}
use of com.linkedin.d2.balancer.util.healthcheck.HealthCheckClientBuilder in project rest.li by linkedin.
the class DegraderLoadBalancerStrategyV3 method checkQuarantineState.
/**
* checkQuarantineState decides if the D2Quarantine can be enabled or not, by health
* checking all the trackerClients once. It enables quarantine only if at least one of the
* clients return success for the checking.
*
* The reasons for this checking include:
*
* . The default method "OPTIONS" is not always enabled by the service
* . The user can config any path/method for checking. We do a sanity checking to
* make sure the configuration is correct, and service/host responds in time.
* Otherwise the host can be kept in quarantine forever if we blindly enable it.
*
* This check actually can warm up the R2 connection pool by making a connection to
* each trackerClient. However since the check happens before any real requests are sent,
* it generally takes much longer time to get the results, due to different warming up
* requirements. Therefore the checking will be retried in next update if current check
* fails.
*
* This function is supposed to be protected by the update lock.
*
* @param clients
* @param config
*/
private void checkQuarantineState(List<DegraderTrackerClientUpdater> clients, DegraderLoadBalancerStrategyConfig config) {
Callback<None> healthCheckCallback = new Callback<None>() {
@Override
public void onError(Throwable e) {
// Do nothing as the quarantine is disabled by default
if (!_state.isQuarantineEnabled()) {
// No need to log the error message if quarantine is already enabled
_rateLimitedLogger.warn("Error enabling quarantine. Health checking failed for service {}: ", _state.getServiceName(), e);
}
}
@Override
public void onSuccess(None result) {
if (_state.tryEnableQuarantine()) {
_log.info("Quarantine is enabled for service {}", _state.getServiceName());
}
}
};
// Ideally we would like to healthchecking all the service hosts (ie all TrackerClients) because
// this can help to warm up the R2 connections to the service hosts, thus speed up the initial access
// speed when d2client starts to access those hosts. However this can expose/expedite the problem that
// the d2client host needs too many connections or file handles to all the hosts, when the downstream
// services have large amount of hosts. Before that problem is addressed, we limit the number of hosts
// for pre-healthchecking to a small number
clients.stream().limit(MAX_HOSTS_TO_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = _state.getHealthCheckMap().get(client);
if (healthCheckClient == null) {
// create a new client if not exits
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(config.getHealthCheckOperations()).setHealthCheckPath(config.getHealthCheckPath()).setServicePath(config.getServicePath()).setClock(config.getClock()).setLatency(config.getQuarantineLatency()).setMethod(config.getHealthCheckMethod()).setClient(client.getTrackerClient()).build();
_state.putHealthCheckClient(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
_log.error("Error to build healthCheckClient ", e);
}
});
// also remove the entries that the corresponding trackerClientUpdaters do not exist anymore
for (DegraderTrackerClientUpdater client : _state.getHealthCheckMap().keySet()) {
if (!clients.contains(client)) {
_state.getHealthCheckMap().remove(client);
}
}
}
Aggregations