use of com.linkedin.d2.balancer.clients.DegraderTrackerClientImpl in project rest.li by linkedin.
the class DegraderLoadBalancerTest method createTrackerClient.
/**
* create multiple trackerClients using the same clock
* @return
*/
private List<DegraderTrackerClient> createTrackerClient(int n, TestClock clock, DegraderImpl.Config config) {
String baseUri = "http://test.linkedin.com:10010/abc";
List<DegraderTrackerClient> result = new LinkedList<>();
for (int i = 0; i < n; i++) {
URI uri = URI.create(baseUri + i);
DegraderTrackerClient client = new DegraderTrackerClientImpl(uri, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri), clock, config);
result.add(client);
}
return result;
}
use of com.linkedin.d2.balancer.clients.DegraderTrackerClientImpl in project rest.li by linkedin.
the class DegraderLoadBalancerTest method testWeightedAndLatencyDegradationBalancingRingWithPartitions.
@Test(groups = { "small", "back-end" }, dataProvider = "consistentHashAlgorithms")
public void testWeightedAndLatencyDegradationBalancingRingWithPartitions(String consistentHashAlgorithm) throws URISyntaxException {
DegraderLoadBalancerStrategyV3 strategy = getStrategy(consistentHashAlgorithm);
Map<URI, TrackerClient> clientsForPartition0 = new HashMap<>();
Map<URI, TrackerClient> clientsForPartition1 = new HashMap<>();
URI uri1 = URI.create("http://someTestService/someTestUrl");
URI uri2 = URI.create("http://abcxfweuoeueoueoueoukeueoueoueoueoueouo/2354");
URI uri3 = URI.create("http://slashdot/blah");
URI uri4 = URI.create("http://idle/server");
TestClock clock1 = new TestClock();
TestClock clock2 = new TestClock();
TestClock clock3 = new TestClock();
@SuppressWarnings("serial") DegraderTrackerClient client1 = new DegraderTrackerClientImpl(uri1, new HashMap<Integer, PartitionData>() {
{
put(0, new PartitionData(1d));
}
}, new TestLoadBalancerClient(uri1), clock1, null);
@SuppressWarnings("serial") DegraderTrackerClient client2 = new DegraderTrackerClientImpl(uri2, new HashMap<Integer, PartitionData>() {
{
put(0, new PartitionData(0.5d));
put(1, new PartitionData(0.5d));
}
}, new TestLoadBalancerClient(uri2), clock2, null);
@SuppressWarnings("serial") DegraderTrackerClient client3 = new DegraderTrackerClientImpl(uri3, new HashMap<Integer, PartitionData>() {
{
put(1, new PartitionData(1d));
}
}, new TestLoadBalancerClient(uri3), clock3, null);
final int partitionId0 = 0;
clientsForPartition0.put(client1.getUri(), client1);
clientsForPartition0.put(client2.getUri(), client2);
final int partitionId1 = 1;
clientsForPartition1.put(client2.getUri(), client2);
clientsForPartition1.put(client3.getUri(), client3);
// force client2 to be disabled
DegraderControl dcClient2Partition0 = client2.getDegraderControl(0);
DegraderControl dcClient2Partition1 = client2.getDegraderControl(1);
dcClient2Partition0.setOverrideMinCallCount(1);
dcClient2Partition0.setMinCallCount(1);
dcClient2Partition0.setMaxDropRate(1d);
dcClient2Partition0.setUpStep(0.4d);
dcClient2Partition0.setHighErrorRate(0);
dcClient2Partition1.setOverrideMinCallCount(1);
dcClient2Partition1.setMinCallCount(1);
dcClient2Partition1.setMaxDropRate(1d);
dcClient2Partition1.setUpStep(0.4d);
dcClient2Partition1.setHighErrorRate(0);
CallCompletion cc = client2.getCallTracker().startCall();
clock2.addMs(1);
cc.endCallWithError();
// force client3 to be disabled
DegraderControl dcClient3Partition1 = client3.getDegraderControl(1);
dcClient3Partition1.setOverrideMinCallCount(1);
dcClient3Partition1.setMinCallCount(1);
dcClient3Partition1.setMaxDropRate(1d);
dcClient3Partition1.setHighErrorRate(0);
dcClient3Partition1.setUpStep(0.2d);
CallCompletion cc3 = client3.getCallTracker().startCall();
clock3.addMs(1);
cc3.endCallWithError();
clock1.addMs(15000);
clock2.addMs(5000);
clock3.addMs(5000);
// trigger a state update
assertNotNull(strategy.getTrackerClient(null, new RequestContext(), 1, partitionId0, clientsForPartition0));
assertNotNull(strategy.getTrackerClient(null, new RequestContext(), 1, partitionId1, clientsForPartition1));
assertNotNull(strategy.getRing(1, partitionId0, clientsForPartition0));
assertNotNull(strategy.getRing(1, partitionId1, clientsForPartition1));
// now do a basic verification to verify getTrackerClient is properly weighting things
int calls = 10000;
int client1Count = 0;
int client2Count = 0;
double tolerance = 0.05d;
for (int i = 0; i < calls; ++i) {
TrackerClient client = strategy.getTrackerClient(null, new RequestContext(), 1, partitionId0, clientsForPartition0);
assertNotNull(client);
if (client.getUri().equals(uri1)) {
++client1Count;
} else {
++client2Count;
}
}
assertTrue(Math.abs((client1Count / (double) calls) - (100 / 130d)) < tolerance);
assertTrue(Math.abs((client2Count / (double) calls) - (30 / 130d)) < tolerance);
client2Count = 0;
int client3Count = 0;
int client4Count = 0;
for (int i = 0; i < calls; ++i) {
TrackerClient client = strategy.getTrackerClient(null, new RequestContext(), 1, partitionId1, clientsForPartition1);
assertNotNull(client);
if (client.getUri().equals(uri3)) {
++client3Count;
} else if (client.getUri().equals(uri2)) {
++client2Count;
} else {
++client4Count;
}
}
assertTrue(Math.abs((client3Count / (double) calls) - (80 / 110d)) < tolerance);
assertTrue(Math.abs((client2Count / (double) calls) - (30 / 110d)) < tolerance);
assertTrue(client4Count == 0);
}
use of com.linkedin.d2.balancer.clients.DegraderTrackerClientImpl in project rest.li by linkedin.
the class DegraderLoadBalancerTest method stressTest.
@Test(groups = { "small", "back-end" })
public void stressTest() {
final DegraderLoadBalancerStrategyV3 strategyV3 = getStrategy();
TestClock testClock = new TestClock();
String baseUri = "http://linkedin.com:9999";
int numberOfPartitions = 10;
Map<String, String> degraderProperties = new HashMap<>();
degraderProperties.put(PropertyKeys.DEGRADER_HIGH_ERROR_RATE, "0.5");
degraderProperties.put(PropertyKeys.DEGRADER_LOW_ERROR_RATE, "0.2");
DegraderImpl.Config degraderConfig = DegraderConfigFactory.toDegraderConfig(degraderProperties);
final List<DegraderTrackerClient> clients = new ArrayList<>();
for (int i = 0; i < numberOfPartitions; i++) {
URI uri = URI.create(baseUri + i);
DegraderTrackerClient client = new DegraderTrackerClientImpl(uri, getDefaultPartitionData(1, numberOfPartitions), new TestLoadBalancerClient(uri), testClock, degraderConfig);
clients.add(client);
}
final ExecutorService executor = Executors.newFixedThreadPool(100);
final CountDownLatch startLatch = new CountDownLatch(1);
final CountDownLatch finishLatch = new CountDownLatch(100);
try {
for (int i = 0; i < numberOfPartitions; i++) {
Assert.assertFalse(strategyV3.getState().getPartitionState(i).isInitialized());
}
for (int i = 0; i < 100; i++) {
final int partitionId = i % numberOfPartitions;
executor.submit(new Runnable() {
@Override
public void run() {
try {
startLatch.await();
} catch (InterruptedException ex) {
}
strategyV3.getRing(1, partitionId, toMap(clients));
finishLatch.countDown();
}
});
}
// all threads would try to getRing simultanously
startLatch.countDown();
if (!finishLatch.await(10, TimeUnit.SECONDS)) {
fail("Stress test failed to finish within 10 seconds");
}
for (int i = 0; i < numberOfPartitions; i++) {
Assert.assertTrue(strategyV3.getState().getPartitionState(i).isInitialized());
}
} catch (InterruptedException ex) {
} finally {
executor.shutdownNow();
}
}
use of com.linkedin.d2.balancer.clients.DegraderTrackerClientImpl in project rest.li by linkedin.
the class DegraderLoadBalancerTest method testWeightedAndLatencyDegradationBalancingRing.
@Test(groups = { "small", "back-end" }, dataProvider = "consistentHashAlgorithms")
public void testWeightedAndLatencyDegradationBalancingRing(String consistentHashAlgorithm) throws URISyntaxException {
DegraderLoadBalancerStrategyV3 strategy = getStrategy(consistentHashAlgorithm);
List<DegraderTrackerClient> clients = new ArrayList<>();
URI uri1 = URI.create("http://test.linkedin.com:3242/fdsaf");
URI uri2 = URI.create("http://test.linkedin.com:3243/fdsaf");
TestClock clock1 = new TestClock();
TestClock clock2 = new TestClock();
DegraderTrackerClient client1 = new DegraderTrackerClientImpl(uri1, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri1), clock1, null);
DegraderTrackerClient client2 = new DegraderTrackerClientImpl(uri2, getDefaultPartitionData(0.8d), new TestLoadBalancerClient(uri2), clock2, null);
clients.add(client1);
clients.add(client2);
DegraderControl dcClient2Default = client2.getDegraderControl(DEFAULT_PARTITION_ID);
dcClient2Default.setOverrideMinCallCount(1);
dcClient2Default.setMinCallCount(1);
dcClient2Default.setMaxDropRate(1d);
dcClient2Default.setUpStep(0.4d);
dcClient2Default.setHighErrorRate(0);
CallCompletion cc = client2.getCallTracker().startCall();
clock2.addMs(1);
cc.endCallWithError();
clock1.addMs(15000);
clock2.addMs(5000);
// trigger a state update
assertNotNull(getTrackerClient(strategy, null, new RequestContext(), 1, clients));
// now do a basic verification to verify getTrackerClient is properly weighting things
double calls = 10000d;
int client1Count = 0;
int client2Count = 0;
double tolerance = 0.05d;
for (int i = 0; i < calls; ++i) {
TrackerClient client = getTrackerClient(strategy, null, new RequestContext(), 1, clients);
assertNotNull(client);
if (client.getUri().equals(uri1)) {
++client1Count;
} else {
++client2Count;
}
}
assertTrue(Math.abs((client1Count / calls) - (100 / 148d)) < tolerance);
assertTrue(Math.abs((client2Count / calls) - (48 / 148d)) < tolerance);
}
use of com.linkedin.d2.balancer.clients.DegraderTrackerClientImpl in project rest.li by linkedin.
the class DegraderLoadBalancerTest method testDegraderLoadBalancerSimulator.
private void testDegraderLoadBalancerSimulator(DegraderLoadBalancerStrategyAdapter adapter, TestClock clock, long timeInterval, List<DegraderTrackerClient> clients, double qps, DegraderImpl.Config degraderConfig) {
long clusterGenerationId = 1;
double overrideDropRate = 0.0;
// simulate latency 4000 ms
// 1st round we use LOAD_BALANCING strategy. Since we have a high latency we will decrease the number of points
// from 100 to 80 (transmissionRate * points per weight).
TrackerClient resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, true, 0.0, 4000, false, false);
assertNotNull(resultTC);
// 2nd round drop rate should be increased by DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP
overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, false, overrideDropRate, 4000, false, false);
// 3rd round. We alternate back to LOAD_BALANCING strategy and we drop the points even more
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, true, overrideDropRate, 4000, false, false);
// 4th round. The drop rate should be increased again like 2nd round
overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, false, overrideDropRate, 4000, false, false);
// 5th round. Alternate to changing hash ring again.
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, true, overrideDropRate, 4000, false, false);
// 6th round. Same as 5th round, we'll increase the drop rate
overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
// 7th round. The # of point in hashring is at the minimum so we can't decrease it further. At this point the client
// is in recovery mode. But since we can't change the hashring anymore, we'll always in CALL_DROPPING mode
// so the next strategy is expected to be LOAD_BALANCING mode.
overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
// 8th round. We'll increase the drop rate to the max.
overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
// 9th round, now we'll simulate as if there still a call even though we drop 100% of all request to get
// tracker client. The assumption is there's some thread that still holds tracker client and we want
// to make sure we can handle the request and we can't degrade the cluster even further.
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
// 10th round, now we'll simulate as if there's no call because we dropped all request
// even though we are in LOAD_BALANCING mode and this tracker client is in recovery mode and there's no call
// so the hashring doesn't change so we go back to reducing the drop rate to 0.8 and that means the next
// strategy is LOAD_BALANCE
overrideDropRate -= DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN;
resultTC = simulateAndTestOneInterval(timeInterval, clock, 0.0, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
// 11th round, this time we'll simulate the latency is now 1000 ms (so it's within low and high watermark). Drop rate
// should stay the same and everything else should stay the same
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 1000, false, false);
// we'll simulate the client dying one by one until all the clients are gone
int numberOfClients = clients.size();
HashSet<URI> uris = new HashSet<>();
HashSet<URI> removedUris = new HashSet<>();
for (TrackerClient client : clients) {
uris.add(client.getUri());
}
LinkedList<TrackerClient> removedClients = new LinkedList<>();
// loadBalancing strategy will always be picked because there is no hash ring changes
boolean isLoadBalancingStrategyTurn = true;
for (int i = numberOfClients; i > 0; i--) {
TrackerClient removed = clients.remove(0);
uris.remove(removed.getUri());
removedClients.addLast(removed);
removedUris.add(removed.getUri());
clusterGenerationId++;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, isLoadBalancingStrategyTurn, overrideDropRate, 1000, false, false);
if (i == 1) {
assertNull(resultTC);
} else {
// the override drop rate is 0.8)
if (resultTC != null) {
assertTrue(uris.contains(resultTC.getUri()));
assertFalse(removedUris.contains(resultTC.getUri()));
}
}
}
assertTrue(uris.isEmpty());
assertTrue(clients.isEmpty());
assertEquals(removedUris.size(), numberOfClients);
assertEquals(removedClients.size(), numberOfClients);
// we'll simulate the client start reviving one by one until all clients are back up again
for (int i = numberOfClients; i > 0; i--) {
TrackerClient added = removedClients.remove(0);
// we have to create a new client. The old client has a degraded DegraderImpl. And in production enviroment
// when a new client join a cluster, it should be in good state. This means there should be 100 points
// in the hash ring for this client
DegraderTrackerClient newClient = new DegraderTrackerClientImpl(added.getUri(), getDefaultPartitionData(1d), new TestLoadBalancerClient(added.getUri()), clock, degraderConfig);
clients.add(newClient);
uris.add(added.getUri());
removedUris.remove(added.getUri());
clusterGenerationId++;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, isLoadBalancingStrategyTurn, overrideDropRate, 1000, false, false);
if (resultTC != null) {
assertTrue(uris.contains(resultTC.getUri()));
assertFalse(removedUris.contains(resultTC.getUri()));
}
}
// the number of points because there is no hash ring changes
for (overrideDropRate -= DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN; overrideDropRate >= 0; overrideDropRate -= DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN) {
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, overrideDropRate, 300, false, false);
}
// we should have recovered fully by this time
overrideDropRate = 0.0;
resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, overrideDropRate, 300, false, false);
assertNotNull(resultTC);
clusterGenerationId++;
// simulate the increase of certain error (connect exception, closedChannelException) rate will cause degradation.
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, true, 0.0, 300, false, true);
// switching to call dropping strategy
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, false, 0.0, 300, false, true);
// continue the degradation
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, true, 0.0, 300, false, true);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, false, 0.0, 300, false, true);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, true, 0.0, 300, false, true);
// now let's remove all the error and see how the cluster recover but we have to wait until next round because
// this round is CALL_DROP strategy
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, 0.0, 300, false, false);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, true, 0.0, 300, false, false);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, false, 0.0, 300, false, false);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, true, 0.0, 300, false, false);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, false, 0.0, 300, false, false);
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, true, 0.0, 300, false, false);
// make sure if we have error that is not from CONNECT_EXCEPTION or CLOSED_CHANNEL_EXCEPTION we don't degrade
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, 0.0, 300, true, false);
// since there's no change in hash ring due to error NOT of CONNECT_EXCEPTION or CLOSED_CHANNEL_EXCEPTION,
// the strategy won't change to CALL_DROPPING
simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, 0.0, 300, true, false);
}
Aggregations