Search in sources :

Example 6 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerStrategyV2_1 method getUnhealthyTrackerClients.

private static List<String> getUnhealthyTrackerClients(List<TrackerClientUpdater> trackerClientUpdaters, Map<URI, Integer> pointsMap, DegraderLoadBalancerStrategyConfig config) {
    List<String> unhealthyClients = new ArrayList<String>();
    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) {
        TrackerClient client = clientUpdater.getTrackerClient();
        int perfectHealth = (int) (client.getPartitionWeight(DEFAULT_PARTITION_ID) * config.getPointsPerWeight());
        Integer point = pointsMap.get(client.getUri());
        if (point < perfectHealth) {
            unhealthyClients.add(client.getUri() + ":" + point + "/" + perfectHealth);
        }
    }
    return unhealthyClients;
}
Also used : TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) ArrayList(java.util.ArrayList)

Example 7 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerTest method testHighLowWatermarks.

@Test(groups = { "small", "back-end" })
public void testHighLowWatermarks() {
    final int NUM_CHECKS = 5;
    Map<String, Object> myMap = new HashMap<String, Object>();
    Long timeInterval = 5000L;
    double globalStepUp = 0.4;
    double globalStepDown = 0.4;
    double highWaterMark = 1000;
    double lowWaterMark = 50;
    TestClock clock = new TestClock();
    myMap.put(PropertyKeys.CLOCK, clock);
    myMap.put(PropertyKeys.HTTP_LB_STRATEGY_PROPERTIES_UPDATE_INTERVAL_MS, timeInterval);
    myMap.put(PropertyKeys.HTTP_LB_GLOBAL_STEP_UP, globalStepUp);
    myMap.put(PropertyKeys.HTTP_LB_GLOBAL_STEP_DOWN, globalStepDown);
    myMap.put(PropertyKeys.HTTP_LB_HIGH_WATER_MARK, highWaterMark);
    myMap.put(PropertyKeys.HTTP_LB_LOW_WATER_MARK, lowWaterMark);
    myMap.put(PropertyKeys.HTTP_LB_CLUSTER_MIN_CALL_COUNT_HIGH_WATER_MARK, 1l);
    myMap.put(PropertyKeys.HTTP_LB_CLUSTER_MIN_CALL_COUNT_LOW_WATER_MARK, 1l);
    DegraderLoadBalancerStrategyConfig config = DegraderLoadBalancerStrategyConfig.createHttpConfigFromMap(myMap);
    DegraderLoadBalancerStrategyV3 strategy = new DegraderLoadBalancerStrategyV3(config, "DegraderLoadBalancerTest", null);
    List<TrackerClient> clients = new ArrayList<TrackerClient>();
    URI uri1 = URI.create("http://test.linkedin.com:3242/fdsaf");
    URIRequest request = new URIRequest(uri1);
    TrackerClient client1 = new TrackerClient(uri1, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri1), clock, null);
    clients.add(client1);
    DegraderControl dcClient1Default = client1.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient1Default.setOverrideMinCallCount(5);
    dcClient1Default.setMinCallCount(5);
    List<CallCompletion> ccList = new ArrayList<CallCompletion>();
    CallCompletion cc;
    TrackerClient resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    // The override drop rate should be zero at this point.
    assertEquals(dcClient1Default.getOverrideDropRate(), 0.0);
    // make high latency calls to the tracker client, verify the override drop rate doesn't change
    for (int j = 0; j < NUM_CHECKS; j++) {
        cc = client1.getCallTracker().startCall();
        ccList.add(cc);
    }
    clock.addMs((long) highWaterMark);
    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext(); ) {
        cc = iter.next();
        cc.endCall();
        iter.remove();
    }
    // go to next time interval.
    clock.addMs(timeInterval);
    // try call dropping on the next updateState
    strategy.setStrategy(DEFAULT_PARTITION_ID, DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.CALL_DROPPING);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    // we now expect that the override drop rate stepped up because updateState
    // made that decision.
    assertEquals(dcClient1Default.getOverrideDropRate(), globalStepUp);
    // make mid latency calls to the tracker client, verify the override drop rate doesn't change
    for (int j = 0; j < NUM_CHECKS; j++) {
        // need to use client1 because the resultTC may be null
        cc = client1.getCallTracker().startCall();
        ccList.add(cc);
    }
    clock.addMs((long) highWaterMark - 1);
    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext(); ) {
        cc = iter.next();
        cc.endCall();
        iter.remove();
    }
    // go to next time interval.
    clock.addMs(timeInterval);
    double previousOverrideDropRate = dcClient1Default.getOverrideDropRate();
    // try call dropping on the next updateState
    strategy.setStrategy(DEFAULT_PARTITION_ID, DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.CALL_DROPPING);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertEquals(dcClient1Default.getOverrideDropRate(), previousOverrideDropRate);
    // make low latency calls to the tracker client, verify the override drop rate decreases
    for (int j = 0; j < NUM_CHECKS; j++) {
        cc = client1.getCallTracker().startCall();
        ccList.add(cc);
    }
    clock.addMs((long) lowWaterMark);
    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext(); ) {
        cc = iter.next();
        cc.endCall();
        iter.remove();
    }
    // go to next time interval.
    clock.addMs(timeInterval);
    // try Call dropping on this updateState
    strategy.setStrategy(DEFAULT_PARTITION_ID, DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.CALL_DROPPING);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertEquals(resultTC.getDegraderControl(DEFAULT_PARTITION_ID).getOverrideDropRate(), 0.0);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) URIRequest(com.linkedin.d2.balancer.util.URIRequest) DegraderControl(com.linkedin.util.degrader.DegraderControl) URI(java.net.URI) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) CallCompletion(com.linkedin.util.degrader.CallCompletion) AtomicLong(java.util.concurrent.atomic.AtomicLong) RequestContext(com.linkedin.r2.message.RequestContext) Test(org.testng.annotations.Test) TrackerClientTest(com.linkedin.d2.balancer.clients.TrackerClientTest)

Example 8 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerTest method testDegraderLoadBalancerSimulator.

private void testDegraderLoadBalancerSimulator(DegraderLoadBalancerStrategyAdapter adapter, TestClock clock, long timeInterval, List<TrackerClient> clients, double qps, DegraderImpl.Config degraderConfig) {
    long clusterGenerationId = 1;
    double overrideDropRate = 0.0;
    //simulate latency 4000 ms
    //1st round we use LOAD_BALANCING strategy. Since we have a high latency we will decrease the number of points
    //from 100 to 80 (transmissionRate * points per weight).
    TrackerClient resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, true, 0.0, 4000, false, false);
    assertNotNull(resultTC);
    //2nd round drop rate should be increased by DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP
    overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, false, overrideDropRate, 4000, false, false);
    //3rd round. We alternate back to LOAD_BALANCING strategy and we drop the points even more
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, true, overrideDropRate, 4000, false, false);
    //4th round. The drop rate should be increased again like 2nd round
    overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, false, overrideDropRate, 4000, false, false);
    //5th round. Alternate to changing hash ring again.
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, true, overrideDropRate, 4000, false, false);
    //6th round. Same as 5th round, we'll increase the drop rate
    overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
    //7th round. The # of point in hashring is at the minimum so we can't decrease it further. At this point the client
    //is in recovery mode. But since we can't change the hashring anymore, we'll always in CALL_DROPPING mode
    //so the next strategy is expected to be LOAD_BALANCING mode.
    overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
    //8th round. We'll increase the drop rate to the max.
    overrideDropRate += DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_UP;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
    //9th round, now we'll simulate as if there still a call even though we drop 100% of all request to get
    //tracker client. The assumption is there's some thread that still holds tracker client and we want
    //to make sure we can handle the request and we can't degrade the cluster even further.
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
    //10th round, now we'll simulate as if there's no call because we dropped all request
    //even though we are in LOAD_BALANCING mode and this tracker client is in recovery mode and there's no call
    //so the hashring doesn't change so we go back to reducing the drop rate to 0.8 and that means the next
    //strategy is LOAD_BALANCE
    overrideDropRate -= DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, 0.0, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 4000, false, false);
    //11th round, this time we'll simulate the latency is now 1000 ms (so it's within low and high watermark). Drop rate
    //should stay the same and everything else should stay the same
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, overrideDropRate, 1000, false, false);
    //we'll simulate the client dying one by one until all the clients are gone
    int numberOfClients = clients.size();
    HashSet<URI> uris = new HashSet<URI>();
    HashSet<URI> removedUris = new HashSet<URI>();
    for (TrackerClient client : clients) {
        uris.add(client.getUri());
    }
    LinkedList<TrackerClient> removedClients = new LinkedList<TrackerClient>();
    //loadBalancing strategy will always be picked because there is no hash ring changes
    boolean isLoadBalancingStrategyTurn = true;
    for (int i = numberOfClients; i > 0; i--) {
        TrackerClient removed = clients.remove(0);
        uris.remove(removed.getUri());
        removedClients.addLast(removed);
        removedUris.add(removed.getUri());
        clusterGenerationId++;
        resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, isLoadBalancingStrategyTurn, overrideDropRate, 1000, false, false);
        if (i == 1) {
            assertNull(resultTC);
        } else {
            //the override drop rate is 0.8)
            if (resultTC != null) {
                assertTrue(uris.contains(resultTC.getUri()));
                assertFalse(removedUris.contains(resultTC.getUri()));
            }
        }
    }
    assertTrue(uris.isEmpty());
    assertTrue(clients.isEmpty());
    assertEquals(removedUris.size(), numberOfClients);
    assertEquals(removedClients.size(), numberOfClients);
    //we'll simulate the client start reviving one by one until all clients are back up again
    for (int i = numberOfClients; i > 0; i--) {
        TrackerClient added = removedClients.remove(0);
        //we have to create a new client. The old client has a degraded DegraderImpl. And in production enviroment
        //when a new client join a cluster, it should be in good state. This means there should be 100 points
        //in the hash ring for this client
        TrackerClient newClient = new TrackerClient(added.getUri(), getDefaultPartitionData(1d), new TestLoadBalancerClient(added.getUri()), clock, degraderConfig);
        clients.add(newClient);
        uris.add(added.getUri());
        removedUris.remove(added.getUri());
        clusterGenerationId++;
        resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, isLoadBalancingStrategyTurn, overrideDropRate, 1000, false, false);
        if (resultTC != null) {
            assertTrue(uris.contains(resultTC.getUri()));
            assertFalse(removedUris.contains(resultTC.getUri()));
        }
    }
    //the number of points because there is no hash ring changes
    for (overrideDropRate -= DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN; overrideDropRate >= 0; overrideDropRate -= DegraderLoadBalancerStrategyConfig.DEFAULT_GLOBAL_STEP_DOWN) {
        resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, overrideDropRate, 300, false, false);
    }
    //we should have recovered fully by this time
    overrideDropRate = 0.0;
    resultTC = simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, overrideDropRate, 300, false, false);
    assertNotNull(resultTC);
    clusterGenerationId++;
    //simulate the increase of certain error (connect exception, closedChannelException) rate will cause degradation.
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, true, 0.0, 300, false, true);
    //switching to call dropping strategy
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, false, 0.0, 300, false, true);
    //continue the degradation
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, true, 0.0, 300, false, true);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, false, 0.0, 300, false, true);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, true, 0.0, 300, false, true);
    //now let's remove all the error and see how the cluster recover but we have to wait until next round because
    //this round is CALL_DROP strategy
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 1, false, 0.0, 300, false, false);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, true, 0.0, 300, false, false);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 39, false, 0.0, 300, false, false);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, true, 0.0, 300, false, false);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 80, false, 0.0, 300, false, false);
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, true, 0.0, 300, false, false);
    //make sure if we have error that is not from CONNECT_EXCEPTION or CLOSED_CHANNEL_EXCEPTION we don't degrade
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, 0.0, 300, true, false);
    //since there's no change in hash ring due to error NOT of CONNECT_EXCEPTION or CLOSED_CHANNEL_EXCEPTION,
    //the strategy won't change to CALL_DROPPING
    simulateAndTestOneInterval(timeInterval, clock, qps, clients, adapter, clusterGenerationId, 100, false, 0.0, 300, true, false);
}
Also used : TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) URI(java.net.URI) LinkedList(java.util.LinkedList) HashSet(java.util.HashSet)

Example 9 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerTest method testClusterRecoveryFast1TC.

@Test(groups = { "small", "back-end" })
public void testClusterRecoveryFast1TC() {
    Map<String, Object> myMap = new HashMap<String, Object>();
    Long timeInterval = 5000L;
    TestClock clock = new TestClock();
    myMap.put(PropertyKeys.CLOCK, clock);
    // This recovery level will put one point into the hash ring, which is good enough to
    // send traffic to it because it is the only member of the cluster.
    myMap.put("initialRecoverLevel", 0.01);
    myMap.put(PropertyKeys.HTTP_LB_RING_RAMP_FACTOR, 2.0);
    myMap.put(PropertyKeys.HTTP_LB_STRATEGY_PROPERTIES_UPDATE_INTERVAL_MS, timeInterval);
    int stepsToFullRecovery = 0;
    //test Strategy V3
    DegraderLoadBalancerStrategyConfig config = DegraderLoadBalancerStrategyConfig.createHttpConfigFromMap(myMap);
    DegraderLoadBalancerStrategyV3 strategyV3 = new DegraderLoadBalancerStrategyV3(config, "DegraderLoadBalancerTest", null);
    DegraderLoadBalancerStrategyAdapter strategy = new DegraderLoadBalancerStrategyAdapter(strategyV3);
    clusterRecovery1TC(myMap, clock, stepsToFullRecovery, timeInterval, strategy, null, DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE);
    //test Strategy V2
    config = DegraderLoadBalancerStrategyConfig.createHttpConfigFromMap(myMap);
    DegraderLoadBalancerStrategyV2_1 strategyV2 = new DegraderLoadBalancerStrategyV2_1(config, "DegraderLoadBalancerTest", null);
    strategy = new DegraderLoadBalancerStrategyAdapter(strategyV2);
    clusterRecovery1TC(myMap, clock, stepsToFullRecovery, timeInterval, strategy, DegraderLoadBalancerStrategyV2_1.DegraderLoadBalancerState.Strategy.LOAD_BALANCE, null);
}
Also used : HashMap(java.util.HashMap) AtomicLong(java.util.concurrent.atomic.AtomicLong) Test(org.testng.annotations.Test) TrackerClientTest(com.linkedin.d2.balancer.clients.TrackerClientTest)

Example 10 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerTest method simulateAndTestOneInterval.

/**
   * simulates calling all the clients with the given QPS according to the given interval.
   * Then verify that the DegraderLoadBalancerState behaves as expected.
   * @param expectedPointsPerClient we'll verify if the points are smaller than the number given here
   * @param isCalledWithError
   * @param isCalledWithErrorForLoadBalancing
   */
private TrackerClient simulateAndTestOneInterval(long timeInterval, TestClock clock, double qps, List<TrackerClient> clients, DegraderLoadBalancerStrategyAdapter adapter, long clusterGenerationId, Integer expectedPointsPerClient, boolean isExpectingDropCallStrategyForNewState, double expectedClusterOverrideDropRate, long latency, boolean isCalledWithError, boolean isCalledWithErrorForLoadBalancing) {
    callClients(latency, qps, clients, clock, timeInterval, isCalledWithError, isCalledWithErrorForLoadBalancing);
    //create any random URIRequest because we just need a URI to be hashed to get the point in hash ring anyway
    if (clients != null && !clients.isEmpty()) {
        URIRequest request = new URIRequest(clients.get(0).getUri());
        TrackerClient client = getTrackerClient(adapter, request, new RequestContext(), clusterGenerationId, clients);
        Map<URI, Integer> pointsMap = adapter.getPointsMap();
        for (TrackerClient trackerClient : clients) {
            Integer pointsInTheRing = pointsMap.get(trackerClient.getUri());
            assertEquals(pointsInTheRing, expectedPointsPerClient);
        }
        if (isExpectingDropCallStrategyForNewState) {
            assertTrue(adapter.isStrategyCallDrop());
        } else {
            assertFalse(adapter.isStrategyCallDrop());
        }
        assertEquals(adapter.getCurrentOverrideDropRate(), expectedClusterOverrideDropRate);
        return client;
    }
    return null;
}
Also used : TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) URIRequest(com.linkedin.d2.balancer.util.URIRequest) RequestContext(com.linkedin.r2.message.RequestContext) URI(java.net.URI)

Aggregations

TrackerClient (com.linkedin.d2.balancer.clients.TrackerClient)14 URI (java.net.URI)11 HashMap (java.util.HashMap)9 ArrayList (java.util.ArrayList)7 Test (org.testng.annotations.Test)6 TrackerClientTest (com.linkedin.d2.balancer.clients.TrackerClientTest)3 URIRequest (com.linkedin.d2.balancer.util.URIRequest)3 Point (com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point)3 RequestContext (com.linkedin.r2.message.RequestContext)3 DegraderControl (com.linkedin.util.degrader.DegraderControl)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 None (com.linkedin.common.util.None)2 D2Client (com.linkedin.d2.balancer.D2Client)2 D2ClientBuilder (com.linkedin.d2.balancer.D2ClientBuilder)2 RestRequest (com.linkedin.r2.message.rest.RestRequest)2 RestRequestBuilder (com.linkedin.r2.message.rest.RestRequestBuilder)2 RestResponse (com.linkedin.r2.message.rest.RestResponse)2 CallCompletion (com.linkedin.util.degrader.CallCompletion)2 HashSet (java.util.HashSet)2