use of com.linkedin.d2.balancer.util.healthcheck.HealthCheck in project rest.li by linkedin.
the class DegraderLoadBalancerStrategyV3 method checkQuarantineState.
/**
* checkQuarantineState decides if the D2Quarantine can be enabled or not, by health
* checking all the trackerClients once. It enables quarantine only if at least one of the
* clients return success for the checking.
*
* The reasons for this checking include:
*
* . The default method "OPTIONS" is not always enabled by the service
* . The user can config any path/method for checking. We do a sanity checking to
* make sure the configuration is correct, and service/host responds in time.
* Otherwise the host can be kept in quarantine forever if we blindly enable it.
*
* This check actually can warm up the R2 connection pool by making a connection to
* each trackerClient. However since the check happens before any real requests are sent,
* it generally takes much longer time to get the results, due to different warming up
* requirements. Therefore the checking will be retried in next update if current check
* fails.
*
* This function is supposed to be protected by the update lock.
*
* @param clients
* @param config
*/
private void checkQuarantineState(List<TrackerClientUpdater> clients, DegraderLoadBalancerStrategyConfig config) {
Callback<None> healthCheckCallback = new Callback<None>() {
@Override
public void onError(Throwable e) {
// Do nothing as the quarantine is disabled by default
_rateLimitedLogger.error("Error to enable quarantine. Health checking failed for service {}: ", _state._serviceName, e);
}
@Override
public void onSuccess(None result) {
if (_state._enableQuarantine.compareAndSet(false, true)) {
_log.info("Quarantine is enabled for service {}", _state._serviceName);
}
}
};
// Ideally we would like to healthchecking all the service hosts (ie all TrackerClients) because
// this can help to warm up the R2 connections to the service hosts, thus speed up the initial access
// speed when d2client starts to access those hosts. However this can expose/expedite the problem that
// the d2client host needs too many connections or file handles to all the hosts, when the downstream
// services have large amount of hosts. Before that problem is addressed, we limit the number of hosts
// for pre-healthchecking to a small number
clients.stream().limit(MAX_HOSTS_TO_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = _state.getHealthCheckClient(client);
if (healthCheckClient == null) {
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(config.getHealthCheckOperations()).setHealthCheckPath(config.getHealthCheckPath()).setServicePath(config.getServicePath()).setClock(config.getClock()).setLatency(config.getQuarantineLatency()).setMethod(config.getHealthCheckMethod()).setClient(client.getTrackerClient()).build();
_state.putHealthCheckClient(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
_log.error("Error to build healthCheckClient ", e);
}
});
// also remove the entries that the corresponding trackerClientUpdaters do not exist anymore
for (TrackerClientUpdater client : _state._healthCheckMap.keySet()) {
if (!clients.contains(client)) {
_state._healthCheckMap.remove(client);
}
}
}
use of com.linkedin.d2.balancer.util.healthcheck.HealthCheck in project rest.li by linkedin.
the class QuarantineManager method preCheckQuarantineState.
/**
* Pre-check if quarantine can be enabled before directly enabling it
* We limit the number of server hosts to prevent too many connections to be made at once when the downstream cluster is large
*
* @param partitionState The state of the partition
* @param quarantineLatency The quarantine latency threshold
*/
private void preCheckQuarantineState(PartitionState partitionState, long quarantineLatency) {
Callback<None> healthCheckCallback = new HealthCheckCallBack<>();
partitionState.getTrackerClients().stream().limit(MAX_HOSTS_TO_PRE_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = partitionState.getHealthCheckMap().get(client);
if (healthCheckClient == null) {
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(_healthCheckOperations).setHealthCheckPath(_quarantineProperties.getHealthCheckPath()).setServicePath(_servicePath).setClock(_clock).setLatency(quarantineLatency).setMethod(_quarantineProperties.getHealthCheckMethod().toString()).setClient(client).build();
partitionState.getHealthCheckMap().put(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
LOG.error("Error to build healthCheckClient ", e);
}
});
}
use of com.linkedin.d2.balancer.util.healthcheck.HealthCheck in project rest.li by linkedin.
the class DegraderLoadBalancerTest method testHealthCheckRequestContextNotShared.
@Test
public void testHealthCheckRequestContextNotShared() {
final DegraderLoadBalancerStrategyConfig config = new DegraderLoadBalancerStrategyConfig(1000);
final TestClock clock = new TestClock();
final DegraderImpl.Config degraderConfig = DegraderConfigFactory.toDegraderConfig(Collections.emptyMap());
final DegraderTrackerClient trackerClient = createTrackerClient(1, clock, degraderConfig).get(0);
final TestLoadBalancerClient testLoadBalancerClient = (TestLoadBalancerClient) trackerClient.getTransportClient();
final DegraderTrackerClientUpdater degraderTrackerClientUpdater = new DegraderTrackerClientUpdater(trackerClient, DEFAULT_PARTITION_ID);
final LoadBalancerQuarantine quarantine = new LoadBalancerQuarantine(degraderTrackerClientUpdater.getTrackerClient(), config, "abc0");
final TransportHealthCheck healthCheck = (TransportHealthCheck) quarantine.getHealthCheckClient();
healthCheck.checkHealth(Callbacks.empty());
final RequestContext requestContext1 = testLoadBalancerClient._requestContext;
final Map<String, String> wireAttrs1 = testLoadBalancerClient._wireAttrs;
healthCheck.checkHealth(Callbacks.empty());
final RequestContext requestContext2 = testLoadBalancerClient._requestContext;
final Map<String, String> wireAttrs2 = testLoadBalancerClient._wireAttrs;
Assert.assertEquals(requestContext1, requestContext2);
Assert.assertNotSame(requestContext1, requestContext2, "RequestContext should not be shared between requests.");
Assert.assertEquals(wireAttrs1, wireAttrs2);
Assert.assertNotSame(wireAttrs1, wireAttrs2, "Wire attributes should not be shared between requests.");
}
use of com.linkedin.d2.balancer.util.healthcheck.HealthCheck in project rest.li by linkedin.
the class DegraderLoadBalancerTest method DegraderLoadBalancerQuarantineTest.
/**
* DegraderLoadBalancerQuarantineTest
*/
@Test(groups = { "small", "back-end" })
public void DegraderLoadBalancerQuarantineTest() {
DegraderLoadBalancerStrategyConfig config = new DegraderLoadBalancerStrategyConfig(1000);
TestClock clock = new TestClock();
DegraderImpl.Config degraderConfig = DegraderConfigFactory.toDegraderConfig(Collections.emptyMap());
List<DegraderTrackerClient> trackerClients = createTrackerClient(3, clock, degraderConfig);
DegraderTrackerClientUpdater degraderTrackerClientUpdater = new DegraderTrackerClientUpdater(trackerClients.get(0), DEFAULT_PARTITION_ID);
LoadBalancerQuarantine quarantine = new LoadBalancerQuarantine(degraderTrackerClientUpdater.getTrackerClient(), config, "abc0");
TransportHealthCheck healthCheck = (TransportHealthCheck) quarantine.getHealthCheckClient();
RestRequest restRequest = healthCheck.getRestRequest();
Assert.assertTrue(restRequest.getURI().equals(URI.create("http://test.linkedin.com:10010/abc0")));
Assert.assertTrue(restRequest.getMethod().equals("OPTIONS"));
DegraderLoadBalancerStrategyConfig config1 = new DegraderLoadBalancerStrategyConfig(1000, DegraderLoadBalancerStrategyConfig.DEFAULT_UPDATE_ONLY_AT_INTERVAL, 100, null, Collections.<String, Object>emptyMap(), DegraderLoadBalancerStrategyConfig.DEFAULT_CLOCK, DegraderLoadBalancerStrategyConfig.DEFAULT_INITIAL_RECOVERY_LEVEL, DegraderLoadBalancerStrategyConfig.DEFAULT_RAMP_FACTOR, DegraderLoadBalancerStrategyConfig.DEFAULT_HIGH_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_LOW_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP, DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN, DegraderLoadBalancerStrategyConfig.DEFAULT_CLUSTER_MIN_CALL_COUNT_HIGH_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_CLUSTER_MIN_CALL_COUNT_LOW_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_HASHRING_POINT_CLEANUP_RATE, null, DegraderLoadBalancerStrategyConfig.DEFAULT_NUM_PROBES, DegraderLoadBalancerStrategyConfig.DEFAULT_POINTS_PER_HOST, DegraderLoadBalancerStrategyConfig.DEFAULT_BOUNDED_LOAD_BALANCING_FACTOR, null, DegraderLoadBalancerStrategyConfig.DEFAULT_QUARANTINE_MAXPERCENT, null, null, "GET", "/test/admin", DegraderImpl.DEFAULT_LOW_LATENCY, null, DegraderLoadBalancerStrategyConfig.DEFAULT_LOW_EVENT_EMITTING_INTERVAL, DegraderLoadBalancerStrategyConfig.DEFAULT_HIGH_EVENT_EMITTING_INTERVAL, DegraderLoadBalancerStrategyConfig.DEFAULT_CLUSTER_NAME);
DegraderTrackerClientUpdater updater1 = new DegraderTrackerClientUpdater(trackerClients.get(1), DEFAULT_PARTITION_ID);
quarantine = new LoadBalancerQuarantine(updater1.getTrackerClient(), config1, "abc0");
healthCheck = (TransportHealthCheck) quarantine.getHealthCheckClient();
restRequest = healthCheck.getRestRequest();
Assert.assertTrue(restRequest.getURI().equals(URI.create("http://test.linkedin.com:10010/test/admin")));
Assert.assertTrue(restRequest.getMethod().equals("GET"));
DegraderLoadBalancerStrategyConfig config2 = new DegraderLoadBalancerStrategyConfig(1000, DegraderLoadBalancerStrategyConfig.DEFAULT_UPDATE_ONLY_AT_INTERVAL, 100, null, Collections.<String, Object>emptyMap(), DegraderLoadBalancerStrategyConfig.DEFAULT_CLOCK, DegraderLoadBalancerStrategyConfig.DEFAULT_INITIAL_RECOVERY_LEVEL, DegraderLoadBalancerStrategyConfig.DEFAULT_RAMP_FACTOR, DegraderLoadBalancerStrategyConfig.DEFAULT_HIGH_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_LOW_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP, DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN, DegraderLoadBalancerStrategyConfig.DEFAULT_CLUSTER_MIN_CALL_COUNT_HIGH_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_CLUSTER_MIN_CALL_COUNT_LOW_WATER_MARK, DegraderLoadBalancerStrategyConfig.DEFAULT_HASHRING_POINT_CLEANUP_RATE, null, DegraderLoadBalancerStrategyConfig.DEFAULT_NUM_PROBES, DegraderLoadBalancerStrategyConfig.DEFAULT_POINTS_PER_HOST, DegraderLoadBalancerStrategyConfig.DEFAULT_BOUNDED_LOAD_BALANCING_FACTOR, null, DegraderLoadBalancerStrategyConfig.DEFAULT_QUARANTINE_MAXPERCENT, null, null, "OPTIONS", null, DegraderImpl.DEFAULT_LOW_LATENCY, null, DegraderLoadBalancerStrategyConfig.DEFAULT_LOW_EVENT_EMITTING_INTERVAL, DegraderLoadBalancerStrategyConfig.DEFAULT_HIGH_EVENT_EMITTING_INTERVAL, DegraderLoadBalancerStrategyConfig.DEFAULT_CLUSTER_NAME);
DegraderTrackerClientUpdater updater2 = new DegraderTrackerClientUpdater(trackerClients.get(2), DEFAULT_PARTITION_ID);
quarantine = new LoadBalancerQuarantine(updater2.getTrackerClient(), config2, "abc0");
healthCheck = (TransportHealthCheck) quarantine.getHealthCheckClient();
restRequest = healthCheck.getRestRequest();
Assert.assertTrue(restRequest.getURI().equals(URI.create("http://test.linkedin.com:10010/abc2")));
Assert.assertTrue(restRequest.getMethod().equals("OPTIONS"));
}
use of com.linkedin.d2.balancer.util.healthcheck.HealthCheck in project rest.li by linkedin.
the class DegraderLoadBalancerStrategyV3 method checkQuarantineState.
/**
* checkQuarantineState decides if the D2Quarantine can be enabled or not, by health
* checking all the trackerClients once. It enables quarantine only if at least one of the
* clients return success for the checking.
*
* The reasons for this checking include:
*
* . The default method "OPTIONS" is not always enabled by the service
* . The user can config any path/method for checking. We do a sanity checking to
* make sure the configuration is correct, and service/host responds in time.
* Otherwise the host can be kept in quarantine forever if we blindly enable it.
*
* This check actually can warm up the R2 connection pool by making a connection to
* each trackerClient. However since the check happens before any real requests are sent,
* it generally takes much longer time to get the results, due to different warming up
* requirements. Therefore the checking will be retried in next update if current check
* fails.
*
* This function is supposed to be protected by the update lock.
*
* @param clients
* @param config
*/
private void checkQuarantineState(List<DegraderTrackerClientUpdater> clients, DegraderLoadBalancerStrategyConfig config) {
Callback<None> healthCheckCallback = new Callback<None>() {
@Override
public void onError(Throwable e) {
// Do nothing as the quarantine is disabled by default
if (!_state.isQuarantineEnabled()) {
// No need to log the error message if quarantine is already enabled
_rateLimitedLogger.warn("Error enabling quarantine. Health checking failed for service {}: ", _state.getServiceName(), e);
}
}
@Override
public void onSuccess(None result) {
if (_state.tryEnableQuarantine()) {
_log.info("Quarantine is enabled for service {}", _state.getServiceName());
}
}
};
// Ideally we would like to healthchecking all the service hosts (ie all TrackerClients) because
// this can help to warm up the R2 connections to the service hosts, thus speed up the initial access
// speed when d2client starts to access those hosts. However this can expose/expedite the problem that
// the d2client host needs too many connections or file handles to all the hosts, when the downstream
// services have large amount of hosts. Before that problem is addressed, we limit the number of hosts
// for pre-healthchecking to a small number
clients.stream().limit(MAX_HOSTS_TO_CHECK_QUARANTINE).forEach(client -> {
try {
HealthCheck healthCheckClient = _state.getHealthCheckMap().get(client);
if (healthCheckClient == null) {
// create a new client if not exits
healthCheckClient = new HealthCheckClientBuilder().setHealthCheckOperations(config.getHealthCheckOperations()).setHealthCheckPath(config.getHealthCheckPath()).setServicePath(config.getServicePath()).setClock(config.getClock()).setLatency(config.getQuarantineLatency()).setMethod(config.getHealthCheckMethod()).setClient(client.getTrackerClient()).build();
_state.putHealthCheckClient(client, healthCheckClient);
}
healthCheckClient.checkHealth(healthCheckCallback);
} catch (URISyntaxException e) {
_log.error("Error to build healthCheckClient ", e);
}
});
// also remove the entries that the corresponding trackerClientUpdaters do not exist anymore
for (DegraderTrackerClientUpdater client : _state.getHealthCheckMap().keySet()) {
if (!clients.contains(client)) {
_state.getHealthCheckMap().remove(client);
}
}
}
Aggregations