Examples with ZooKeeper - com.linkedin.d2.discovery.stores.zk.ZooKeeper

Example 11 with ZooKeeper

use of com.linkedin.d2.discovery.stores.zk.ZooKeeper in project rest.li by linkedin.

the class ExampleD2Client method main.

public static void main(String[] args) throws IOException, ParseException, InterruptedException {
    //get client configuration
    JSONObject json = parseConfig();
    String zkConnectString = (String) json.get("zkConnectString");
    Long zkSessionTimeout = (Long) json.get("zkSessionTimeout");
    String zkBasePath = (String) json.get("zkBasePath");
    Long zkStartupTimeout = (Long) json.get("zkStartupTimeout");
    Long zkLoadBalancerNotificationTimeout = (Long) json.get("zkLoadBalancerNotificationTimeout");
    String zkFlagFile = (String) json.get("zkFlagFile");
    String fsBasePath = (String) json.get("fsBasePath");
    final Map<String, Long> trafficProportion = (Map<String, Long>) json.get("trafficProportion");
    final Long clientShutdownTimeout = (Long) json.get("clientShutdownTimeout");
    final Long clientStartTimeout = (Long) json.get("clientStartTimeout");
    Long rate = (Long) json.get("rateMillisecond");
    System.out.println("Finished parsing client config");
    //create d2 client
    final D2Client d2Client = new D2ClientBuilder().setZkHosts(zkConnectString).setZkSessionTimeout(zkSessionTimeout, TimeUnit.MILLISECONDS).setZkStartupTimeout(zkStartupTimeout, TimeUnit.MILLISECONDS).setLbWaitTimeout(zkLoadBalancerNotificationTimeout, TimeUnit.MILLISECONDS).setFlagFile(zkFlagFile).setBasePath(zkBasePath).setFsBasePath(fsBasePath).build();
    System.out.println("Finished creating d2 client, starting d2 client...");
    ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
    final CountDownLatch latch = new CountDownLatch(1);
    //start d2 client by connecting to zookeeper
    startClient(d2Client, executorService, clientStartTimeout, new Callback<None>() {

        @Override
        public void onError(Throwable e) {
            System.exit(1);
        }

        @Override
        public void onSuccess(None result) {
            latch.countDown();
        }
    });
    latch.await();
    System.out.println("D2 client is sending traffic");
    ScheduledFuture task = executorService.scheduleAtFixedRate(new Runnable() {

        @Override
        public void run() {
            try {
                sendTraffic(trafficProportion, d2Client);
            } catch (URISyntaxException e) {
                e.printStackTrace();
            }
        }
    }, 0, rate, TimeUnit.MILLISECONDS);
    System.out.println("Press enter to stop D2 client...");
    System.in.read();
    task.cancel(false);
    System.out.println("Shutting down...");
    shutdown(d2Client, executorService, clientShutdownTimeout);
}

Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) D2Client(com.linkedin.d2.balancer.D2Client) D2ClientBuilder(com.linkedin.d2.balancer.D2ClientBuilder) URISyntaxException(java.net.URISyntaxException) CountDownLatch(java.util.concurrent.CountDownLatch) ScheduledFuture(java.util.concurrent.ScheduledFuture) JSONObject(org.json.simple.JSONObject) Map(java.util.Map) None(com.linkedin.common.util.None)

Example 12 with ZooKeeper

use of com.linkedin.d2.discovery.stores.zk.ZooKeeper in project rest.li by linkedin.

the class ConfigRunner method main.

public static void main(String[] args) throws Exception {
    //get server configuration
    String path = new File(new File(".").getAbsolutePath()).getCanonicalPath() + "/src/main/d2Config/d2Config.json";
    JSONParser parser = new JSONParser();
    Object object = parser.parse(new FileReader(path));
    JSONObject json = (JSONObject) object;
    System.out.println("Finished parsing d2 topology config");
    String zkConnectString = (String) json.get("zkConnectString");
    int zkSessionTimeout = ((Long) json.get("zkSessionTimeout")).intValue();
    String zkBasePath = (String) json.get("zkBasePath");
    int zkRetryLimit = ((Long) json.get("zkRetryLimit")).intValue();
    Map<String, Object> serviceDefaults = (Map<String, Object>) json.get("defaultServiceProperties");
    //this contains the topology of our system
    Map<String, Object> clusterServiceConfigurations = (Map<String, Object>) json.get("d2Clusters");
    // 'comment' has no special meaning in json...
    clusterServiceConfigurations.remove("comment");
    System.out.println("Populating zookeeper with d2 configuration");
    //d2Config is the utility class for populating zookeeper with our topology
    //some the params are not needed for this simple example so we will just use
    //default value by passing an empty map
    D2Config d2Config = new D2Config(zkConnectString, zkSessionTimeout, zkBasePath, zkSessionTimeout, zkRetryLimit, (Map<String, Object>) Collections.EMPTY_MAP, serviceDefaults, clusterServiceConfigurations, (Map<String, Object>) Collections.EMPTY_MAP, (Map<String, Object>) Collections.EMPTY_MAP);
    //populate zookeeper
    d2Config.configure();
    System.out.println("Finished populating zookeeper with d2 configuration");
}

Also used : JSONObject(org.json.simple.JSONObject) JSONParser(org.json.simple.parser.JSONParser) JSONObject(org.json.simple.JSONObject) FileReader(java.io.FileReader) D2Config(com.linkedin.d2.discovery.util.D2Config) File(java.io.File) Map(java.util.Map)

Example 13 with ZooKeeper

use of com.linkedin.d2.discovery.stores.zk.ZooKeeper in project rest.li by linkedin.

the class SimpleLoadBalancerStateTest method testGetClientAfterBadProperties.

@Test(groups = { "small", "back-end" })
public void testGetClientAfterBadProperties() throws URISyntaxException, InterruptedException {
    reset();
    URI uri = URI.create("http://cluster-1/test");
    List<String> schemes = new ArrayList<String>();
    Map<Integer, PartitionData> partitionData = new HashMap<Integer, PartitionData>(1);
    partitionData.put(DefaultPartitionAccessor.DEFAULT_PARTITION_ID, new PartitionData(1d));
    Map<URI, Map<Integer, PartitionData>> uriData = new HashMap<URI, Map<Integer, PartitionData>>();
    uriData.put(uri, partitionData);
    schemes.add("http");
    assertNull(_state.getClient("service-1", uri));
    Map<String, Object> transportProperties = new HashMap<String, Object>();
    transportProperties.put("foobar", "unsupportedValue");
    _serviceRegistry.put("service-1", new ServiceProperties("service-1", "cluster-1", "/test", Arrays.asList("random"), Collections.<String, Object>emptyMap(), transportProperties, null, schemes, null));
    // we add the property first before listening to the service because the MockStore will
    // immediately publish to the eventBus when listenToService() is called, whereas the
    // ZooKeeper stores wait until we get a response back from zookeeper, which triggers handlePut.
    CountDownLatch cdl1 = new CountDownLatch(1);
    _state.listenToService("service-1", new SimpleLoadBalancer.SimpleLoadBalancerCountDownCallback(cdl1));
    // Verify the callback did NOT get invoked, i.e., the exception was thrown during handlePut()
    assertEquals(cdl1.getCount(), 1);
    // set up state
    CountDownLatch cdl2 = new CountDownLatch(1);
    _state.listenToCluster("cluster-1", new SimpleLoadBalancer.SimpleLoadBalancerCountDownCallback(cdl2));
    assertTrue(cdl2.await(60, TimeUnit.SECONDS));
    _uriRegistry.put("cluster-1", new UriProperties("cluster-1", uriData));
    assertNull(_state.getClient("service-1", uri));
    _serviceRegistry.put("service-1", new ServiceProperties("service-1", "cluster-1", "/test", Arrays.asList("random"), Collections.<String, Object>emptyMap(), null, null, schemes, null));
    CountDownLatch cdl = new CountDownLatch(1);
    _state.listenToService("service-1", new SimpleLoadBalancer.SimpleLoadBalancerCountDownCallback(cdl));
    assertTrue(cdl.await(60, TimeUnit.SECONDS));
}

Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) ServiceProperties(com.linkedin.d2.balancer.properties.ServiceProperties) PartitionData(com.linkedin.d2.balancer.properties.PartitionData) UriProperties(com.linkedin.d2.balancer.properties.UriProperties) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.testng.annotations.Test) DegraderLoadBalancerTest(com.linkedin.d2.balancer.strategies.degrader.DegraderLoadBalancerTest)

Example 14 with ZooKeeper

use of com.linkedin.d2.discovery.stores.zk.ZooKeeper in project rest.li by linkedin.

the class SimpleLoadBalancerStateTest method testRefreshWithConcurrentGetTC.

// This test is to verify a fix for a specific bug, where the d2 client receives a zookeeper
// update and concurrent getTrackerClient requests. In that case, all but the first concurrent
// requests got a null tracker client because the degraderLoadBalancerState was not fully initialized
// (hashring was empty), and this continued until the first request had atomically swamped a
// fully initialized state for other requests to use. This test failed on pre-fix code, it now
// succeeds.
@Test(groups = { "small", "back-end" })
public void testRefreshWithConcurrentGetTC() throws URISyntaxException, InterruptedException {
    reset();
    LinkedList<String> strategyList = new LinkedList<String>();
    URI uri = URI.create("http://cluster-1/test");
    final List<String> schemes = new ArrayList<String>();
    schemes.add("http");
    strategyList.add("degraderV3");
    // set up state
    _state.listenToService("service-1", new NullStateListenerCallback());
    _state.listenToCluster("cluster-1", new NullStateListenerCallback());
    assertNull(_state.getStrategy("service-1", "http"));
    // Use the _clusterRegistry.put to populate the _state.clusterProperties, used by
    // _state.refreshServiceStrategies
    _clusterRegistry.put("cluster-1", new ClusterProperties("cluster-1"));
    _serviceRegistry.put("service-1", new ServiceProperties("service-1", "cluster-1", "/test", strategyList, Collections.<String, Object>emptyMap(), Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(), schemes, Collections.<URI>emptySet()));
    LoadBalancerStrategy strategy = _state.getStrategy("service-1", "http");
    assertNotNull(strategy, "got null strategy in setup");
    // test serial to make sure things are working before concurrent test
    TransportClient resultTC = _state.getClient("service-1", "http");
    assertNotNull(resultTC, "got null tracker client in non-concurrent env");
    ExecutorService myExecutor = Executors.newCachedThreadPool();
    ArrayList<TcCallable> cArray = new ArrayList<TcCallable>();
    List<TrackerClient> clients = new ArrayList<TrackerClient>();
    Map<Integer, PartitionData> partitionDataMap = new HashMap<Integer, PartitionData>(2);
    partitionDataMap.put(DefaultPartitionAccessor.DEFAULT_PARTITION_ID, new PartitionData(1d));
    clients.add(new TrackerClient(uri, partitionDataMap, new DegraderLoadBalancerTest.TestLoadBalancerClient(uri), SystemClock.instance(), null));
    for (int i = 0; i < 20; i++) {
        cArray.add(i, new TcCallable(clients, _state));
    }
    Runnable refreshTask = new Runnable() {

        @Override
        public void run() {
            while (true) {
                List<String> myStrategyList = new LinkedList<String>();
                myStrategyList.add("degraderV3");
                _state.refreshServiceStrategies(new ServiceProperties("service-1", "cluster-1", "/test", myStrategyList, Collections.<String, Object>emptyMap(), Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(), schemes, Collections.<URI>emptySet()));
                if (Thread.interrupted()) {
                    return;
                }
            }
        }
    };
    myExecutor.execute(refreshTask);
    Integer badResults = 0;
    ArrayList<Future<Integer>> myList = new ArrayList<Future<Integer>>();
    for (int i = 0; i < cArray.size(); i++) {
        @SuppressWarnings("unchecked") Callable<Integer> c = (Callable) cArray.get(i);
        myList.add(i, myExecutor.submit(c));
    }
    try {
        for (int i = 0; i < cArray.size(); i++) {
            badResults += myList.get(i).get();
        }
    } catch (ExecutionException e) {
        Assert.assertFalse(true, "got ExecutionException");
    } finally {
        try {
            // call shutdownNow() to send an interrupt to the refreshTask
            myExecutor.shutdownNow();
            boolean status = myExecutor.awaitTermination(5, TimeUnit.SECONDS);
            if (status == false) {
                Assert.assertFalse(true, "failed to shutdown threads correctly");
            }
        } catch (InterruptedException ie) {
            // this thread was interrupted
            myExecutor.shutdownNow();
        }
    }
    Assert.assertTrue(badResults == 0, "getTrackerClients returned null");
}

Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) URI(java.net.URI) Callable(java.util.concurrent.Callable) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) PartitionData(com.linkedin.d2.balancer.properties.PartitionData) ExecutionException(java.util.concurrent.ExecutionException) TransportClient(com.linkedin.r2.transport.common.bridge.client.TransportClient) LoadBalancerStrategy(com.linkedin.d2.balancer.strategies.LoadBalancerStrategy) RandomLoadBalancerStrategy(com.linkedin.d2.balancer.strategies.random.RandomLoadBalancerStrategy) LinkedList(java.util.LinkedList) NullStateListenerCallback(com.linkedin.d2.balancer.LoadBalancerState.NullStateListenerCallback) ServiceProperties(com.linkedin.d2.balancer.properties.ServiceProperties) SynchronousExecutorService(com.linkedin.d2.discovery.event.SynchronousExecutorService) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) ClusterProperties(com.linkedin.d2.balancer.properties.ClusterProperties) Future(java.util.concurrent.Future) Test(org.testng.annotations.Test) DegraderLoadBalancerTest(com.linkedin.d2.balancer.strategies.degrader.DegraderLoadBalancerTest)

Example 15 with ZooKeeper

use of com.linkedin.d2.discovery.stores.zk.ZooKeeper in project rest.li by linkedin.

the class DegraderLoadBalancerStrategyV3 method doUpdatePartitionState.

/**
   * updatePartitionState
   *
   * We have two mechanisms to influence the health and traffic patterns of the client. They are
   * by load balancing (switching traffic from one host to another) and by degrading service
   * (dropping calls). We load balance by allocating points in a consistent hash ring based on the
   * computedDropRate of the individual TrackerClients, which takes into account the latency
   * seen by that TrackerClient's requests. We can alternatively, if the cluster is
   * unhealthy (by using a high latency watermark) drop a portion of traffic across all tracker
   * clients corresponding to this cluster.
   *
   * The reason we do not currently consider error rate when adjusting the hash ring is that
   * there are legitimate errors that servers can send back for clients to handle, such as
   * 400 return codes. A potential improvement would be to catch transport level exceptions and 500
   * level return codes, but the implication of that would need to be carefully understood and documented.
   *
   * We don't want both to reduce hash points and allow clients to manage their own drop rates
   * because the clients do not have a global view that the load balancing strategy does. Without
   * a global view, the clients won't know if it already has a reduced number of hash points. If the
   * client continues to drop at the same drop rate as before their points have been reduced, then
   * the client would have its outbound request reduced by both reduction in points and the client's
   * drop rate. To avoid this, the drop rate is managed globally by the load balancing strategy and
   * provided to each client. The strategy will alternate between adjusting the hash ring points or
   * the global drop rate in order to avoid double penalizing a client.
   *
   * We also have a mechanism for recovery if the number of points in the hash ring is not
   * enough to receive traffic. The initialRecoveryLevel is a number between 0.0 and 1.0, and
   * corresponds to a weight of the tracker client's full hash points.
   * The reason for the weight is to allow an initialRecoveryLevel that corresponds to
   * less than one hash point. This would be useful if a "cooling off" period is desirable for the
   * misbehaving tracker clients, ie , given a full weight of 100 hash points,0.005 means that
   * there will be one cooling off period before the client is reintroduced into the hash ring.
   *
   * The second configuration, rampFactor, will geometrically increase the
   * previous recoveryLevel if traffic still hasn't been seen for that tracker client.
   *
   * For example, given initialRecoveryLevel = 0.01, rampFactor = 2, and default tracker client hash
   * points of 100, we will increase the hash points in this pattern on successive update States:
   *  0.01, 0.02, 0.04, 0.08, 0.16, 0.32, etc., aborting as soon as
   * calls are recorded for that tracker client.
   *
   * We also have highWaterMark and lowWaterMark as properties of the DegraderLoadBalancer strategy
   * so that the strategy can make decisions on whether to start dropping traffic globally across
   * all tracker clients for this cluster. The amount of traffic to drop is controlled by the
   * globalStepUp and globalStepDown properties, where globalStepUp controls how much the global
   * drop rate increases per interval, and globalStepDown controls how much the global drop rate
   * decreases per interval. We only step up the global drop rate when the average cluster latency
   * is higher than the highWaterMark, and only step down the global drop rate when the average
   * cluster latency is lower than the global drop rate.
   *
   * This code is thread reentrant. Multiple threads can potentially call this concurrently, and so
   * callers must pass in the DegraderLoadBalancerState that they based their shouldUpdate() call on.
   * The multiple threads may have different views of the trackerClients latency, but this is
   * ok as the new state in the end will have only taken one action (either loadbalance or
   * call-dropping with at most one step). Currently we will not call this concurrently, as
   * checkUpdatePartitionState will control entry to a single thread.
   *
   * @param clusterGenerationId
   * @param trackerClientUpdaters
   * @param oldState
   * @param config
   */
private static PartitionDegraderLoadBalancerState doUpdatePartitionState(long clusterGenerationId, int partitionId, PartitionDegraderLoadBalancerState oldState, DegraderLoadBalancerStrategyConfig config, List<TrackerClientUpdater> trackerClientUpdaters, boolean isQuarantineEnabled) {
    debug(_log, "updating state for: ", trackerClientUpdaters);
    double sumOfClusterLatencies = 0.0;
    long totalClusterCallCount = 0;
    double newMaxDropRate;
    boolean hashRingChanges = false;
    boolean recoveryMapChanges = false;
    boolean quarantineMapChanged = false;
    PartitionDegraderLoadBalancerState.Strategy strategy = oldState.getStrategy();
    Map<TrackerClient, Double> oldRecoveryMap = oldState.getRecoveryMap();
    Map<TrackerClient, Double> newRecoveryMap = new HashMap<TrackerClient, Double>(oldRecoveryMap);
    double currentOverrideDropRate = oldState.getCurrentOverrideDropRate();
    double initialRecoveryLevel = config.getInitialRecoveryLevel();
    double ringRampFactor = config.getRingRampFactor();
    int pointsPerWeight = config.getPointsPerWeight();
    PartitionDegraderLoadBalancerState newState;
    Map<TrackerClient, DegraderLoadBalancerQuarantine> quarantineMap = oldState.getQuarantineMap();
    Map<TrackerClient, DegraderLoadBalancerQuarantine> quarantineHistory = oldState.getQuarantineHistory();
    Set<TrackerClient> activeClients = new HashSet<>();
    long clk = config.getClock().currentTimeMillis();
    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) {
        TrackerClient client = clientUpdater.getTrackerClient();
        DegraderControl degraderControl = client.getDegraderControl(partitionId);
        double averageLatency = degraderControl.getLatency();
        long callCount = degraderControl.getCallCount();
        oldState.getPreviousMaxDropRate().put(client, clientUpdater.getMaxDropRate());
        sumOfClusterLatencies += averageLatency * callCount;
        totalClusterCallCount += callCount;
        boolean recoveryMapContainsClient = newRecoveryMap.containsKey(client);
        if (isQuarantineEnabled) {
            activeClients.add(client);
            // Check/update quarantine state if current client is already under quarantine
            DegraderLoadBalancerQuarantine quarantine = quarantineMap.get(client);
            if (quarantine != null && quarantine.checkUpdateQuarantineState()) {
                // Evict client from quarantine
                quarantineMap.remove(client);
                quarantineHistory.put(client, quarantine);
                _log.info("TrackerClient {} evicted from quarantine @ {}", client.getUri(), clk);
                // Next need to put the client to slow-start/recovery mode to gradually pick up traffic.
                // For now simply force the weight to the initialRecoveryLevel so the client can gradually recover
                // RecoveryMap is used here to track the clients that just evicted from quarantine
                // They'll not be quarantined again in the recovery phase even though the effective
                // weight is within the range.
                newRecoveryMap.put(client, degraderControl.getMaxDropRate());
                clientUpdater.setMaxDropRate(1.0 - initialRecoveryLevel);
                quarantineMapChanged = true;
            }
        }
        if (recoveryMapContainsClient) {
            // points in the hash ring for the clients.
            if (callCount == 0) {
                // if this client is enrolled in the program, decrease the maxDropRate
                // it is important to note that this excludes clients that haven't gotten traffic
                // due solely to low volume.
                double oldMaxDropRate = clientUpdater.getMaxDropRate();
                double transmissionRate = 1.0 - oldMaxDropRate;
                if (transmissionRate <= 0.0) {
                    // We use the initialRecoveryLevel to indicate how many points to initially set
                    // the tracker client to when traffic has stopped flowing to this node.
                    transmissionRate = initialRecoveryLevel;
                } else {
                    transmissionRate *= ringRampFactor;
                    transmissionRate = Math.min(transmissionRate, 1.0);
                }
                newMaxDropRate = 1.0 - transmissionRate;
                if (strategy == PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE) {
                    // if it's the hash ring's turn to adjust, then adjust the maxDropRate.
                    // Otherwise, we let the call dropping strategy take it's turn, even if
                    // it may do nothing.
                    clientUpdater.setMaxDropRate(newMaxDropRate);
                }
                recoveryMapChanges = true;
            } else {
                // else if the recovery map contains the client and the call count was > 0
                // tough love here, once the rehab clients start taking traffic, we
                // restore their maxDropRate to it's original value, and unenroll them
                // from the program.
                // This is safe because the hash ring points are controlled by the
                // computedDropRate variable, and the call dropping rate is controlled by
                // the overrideDropRate. The maxDropRate only serves to cap the computedDropRate and
                // overrideDropRate.
                // We store the maxDropRate and restore it here because the initialRecoveryLevel could
                // potentially be higher than what the default maxDropRate allowed. (the maxDropRate doesn't
                // necessarily have to be 1.0). For instance, if the maxDropRate was 0.99, and the
                // initialRecoveryLevel was 0.05  then we need to store the old maxDropRate.
                clientUpdater.setMaxDropRate(newRecoveryMap.get(client));
                newRecoveryMap.remove(client);
                recoveryMapChanges = true;
            }
        }
    }
    // in TrackerClientUpdaters -- those URIs were removed from zookeeper
    if (isQuarantineEnabled) {
        quarantineMap.entrySet().removeIf(e -> !activeClients.contains(e.getKey()));
        quarantineHistory.entrySet().removeIf(e -> !activeClients.contains(e.getKey()));
    }
    if (oldState.getClusterGenerationId() == clusterGenerationId && totalClusterCallCount <= 0 && !recoveryMapChanges && !quarantineMapChanged) {
        // if the cluster has not been called recently (total cluster call count is <= 0)
        // and we already have a state with the same set of URIs (same cluster generation),
        // and no clients are in rehab or evicted from quarantine, then don't change anything.
        debug(_log, "New state is the same as the old state so we're not changing anything. Old state = ", oldState, ", config= ", config);
        return new PartitionDegraderLoadBalancerState(oldState, clusterGenerationId, config.getClock().currentTimeMillis());
    }
    // update our overrides.
    double newCurrentAvgClusterLatency = -1;
    if (totalClusterCallCount > 0) {
        newCurrentAvgClusterLatency = sumOfClusterLatencies / totalClusterCallCount;
    }
    debug(_log, "average cluster latency: ", newCurrentAvgClusterLatency);
    // This points map stores how many hash map points to allocate for each tracker client.
    Map<URI, Integer> points = new HashMap<URI, Integer>();
    Map<URI, Integer> oldPointsMap = oldState.getPointsMap();
    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) {
        TrackerClient client = clientUpdater.getTrackerClient();
        double successfulTransmissionWeight;
        URI clientUri = client.getUri();
        // Don't take into account cluster health when calculating the number of points
        // for each client. This is because the individual clients already take into account
        // latency and errors, and a successfulTransmissionWeight can and should be made
        // independent of other nodes in the cluster. Otherwise, one unhealthy client in a small
        // cluster can take down the entire cluster if the avg latency is too high.
        // The global drop rate will take into account the cluster latency. High cluster-wide error
        // rates are not something d2 can address.
        //
        // this client's maxDropRate and currentComputedDropRate may have been adjusted if it's in the
        // rehab program (to gradually send traffic it's way).
        DegraderControl degraderControl = client.getDegraderControl(partitionId);
        double dropRate = Math.min(degraderControl.getCurrentComputedDropRate(), clientUpdater.getMaxDropRate());
        // calculate the weight as the probability of successful transmission to this
        // node divided by the probability of successful transmission to the entire
        // cluster
        double clientWeight = client.getPartitionWeight(partitionId);
        successfulTransmissionWeight = clientWeight * (1.0 - dropRate);
        // calculate the weight as the probability of a successful transmission to this node
        // multiplied by the client's self-defined weight. thus, the node's final weight
        // takes into account both the self defined weight (to account for different
        // hardware in the same cluster) and the performance of the node (as defined by the
        // node's degrader).
        debug(_log, "computed new weight for uri ", clientUri, ": ", successfulTransmissionWeight);
        // keep track if we're making actual changes to the Hash Ring in this updatePartitionState.
        int newPoints = (int) (successfulTransmissionWeight * pointsPerWeight);
        boolean quarantineEffect = false;
        if (isQuarantineEnabled) {
            if (quarantineMap.containsKey(client)) {
                // If the client is still in quarantine, keep the points to 0 so no real traffic will be used
                newPoints = 0;
                quarantineEffect = true;
            } else //    HTTP_LB_QUARANTINE_MAX_PERCENT)
            if (successfulTransmissionWeight <= 0.0 && clientWeight > EPSILON && degraderControl.isHigh()) {
                if (1.0 * quarantineMap.size() < Math.ceil(trackerClientUpdaters.size() * config.getQuarantineMaxPercent())) {
                    // Put the client into quarantine
                    DegraderLoadBalancerQuarantine quarantine = quarantineHistory.remove(client);
                    if (quarantine == null) {
                        quarantine = new DegraderLoadBalancerQuarantine(clientUpdater, config, oldState.getServiceName());
                    }
                    // If the trackerClient was just recently evicted from quarantine, it is possible that
                    // the service is already in trouble while the quarantine probing approach works
                    // fine. In such case we'll reuse the previous waiting duration instead of starting
                    // from scratch again
                    quarantine.reset((clk - quarantine.getLastChecked()) > DegraderLoadBalancerStrategyConfig.DEFAULT_QUARANTINE_REENTRY_TIME);
                    quarantineMap.put(client, quarantine);
                    // reduce the points to 0 so no real traffic will be used
                    newPoints = 0;
                    _log.warn("TrackerClient {} is put into quarantine {}. OverrideDropRate = {}, callCount = {}, latency = {}," + " errorRate = {}", new Object[] { client.getUri(), quarantine, degraderControl.getMaxDropRate(), degraderControl.getCallCount(), degraderControl.getLatency(), degraderControl.getErrorRate() });
                    quarantineEffect = true;
                } else {
                    _log.error("Quarantine for service {} is full! Could not add {}", oldState.getServiceName(), client);
                }
            }
        }
        // client into the recovery program, because we don't want this tracker client to get any traffic.
        if (!quarantineEffect && newPoints == 0 && clientWeight > EPSILON) {
            // We are choking off traffic to this tracker client.
            // Enroll this tracker client in the recovery program so that
            // we can make sure it still gets some traffic
            Double oldMaxDropRate = clientUpdater.getMaxDropRate();
            // set the default recovery level.
            newPoints = (int) (initialRecoveryLevel * pointsPerWeight);
            // Keep track of the original maxDropRate
            if (!newRecoveryMap.containsKey(client)) {
                // keep track of this client,
                newRecoveryMap.put(client, oldMaxDropRate);
                clientUpdater.setMaxDropRate(1.0 - initialRecoveryLevel);
            }
        }
        points.put(clientUri, newPoints);
        if (!oldPointsMap.containsKey(clientUri) || oldPointsMap.get(clientUri) != newPoints) {
            hashRingChanges = true;
        }
    }
    // if there were changes to the members of the cluster
    if ((strategy == PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE && hashRingChanges) || oldState.getClusterGenerationId() != clusterGenerationId) {
        // atomic overwrite
        // try Call Dropping next time we updatePartitionState.
        newState = new PartitionDegraderLoadBalancerState(clusterGenerationId, config.getClock().currentTimeMillis(), true, oldState.getRingFactory(), points, PartitionDegraderLoadBalancerState.Strategy.CALL_DROPPING, currentOverrideDropRate, newCurrentAvgClusterLatency, newRecoveryMap, oldState.getServiceName(), oldState.getDegraderProperties(), totalClusterCallCount, quarantineMap, quarantineHistory);
        logState(oldState, newState, partitionId, config, trackerClientUpdaters);
    } else {
        // time to try call dropping strategy, if necessary.
        // we are explicitly setting the override drop rate to a number between 0 and 1, inclusive.
        double newDropLevel = Math.max(0.0, currentOverrideDropRate);
        // to get the cluster latency stabilized
        if (newCurrentAvgClusterLatency > 0 && totalClusterCallCount >= config.getMinClusterCallCountHighWaterMark()) {
            // statistically significant
            if (newCurrentAvgClusterLatency >= config.getHighWaterMark() && currentOverrideDropRate != 1.0) {
                // if the cluster latency is too high and we can drop more traffic
                newDropLevel = Math.min(1.0, newDropLevel + config.getGlobalStepUp());
            } else if (newCurrentAvgClusterLatency <= config.getLowWaterMark() && currentOverrideDropRate != 0.0) {
                // else if the cluster latency is good and we can reduce the override drop rate
                newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown());
            }
        // else the averageClusterLatency is between Low and High, or we can't change anything more,
        // then do not change anything.
        } else if (newCurrentAvgClusterLatency > 0 && totalClusterCallCount >= config.getMinClusterCallCountLowWaterMark()) {
            //but we might recover a bit if the latency is healthy
            if (newCurrentAvgClusterLatency <= config.getLowWaterMark() && currentOverrideDropRate != 0.0) {
                // the cluster latency is good and we can reduce the override drop rate
                newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown());
            }
        // else the averageClusterLatency is somewhat high but since the qps is not that high, we shouldn't degrade
        } else {
            // if we enter here that means we have very low traffic. We should reduce the overrideDropRate, if possible.
            // when we have below 1 QPS traffic, we should be pretty confident that the cluster can handle very low
            // traffic. Of course this is depending on the MinClusterCallCountLowWaterMark that the service owner sets.
            // Another reason is this might have happened if we had somehow choked off all traffic to the cluster, most
            // likely in a one node/small cluster scenario. Obviously, we can't check latency here,
            // we'll have to rely on the metric in the next updatePartitionState. If the cluster is still having
            // latency problems, then we will oscillate between off and letting a little traffic through,
            // and that is acceptable. If the latency, though high, is deemed acceptable, then the
            // watermarks can be adjusted to let more traffic through.
            newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown());
        }
        if (newDropLevel != currentOverrideDropRate) {
            overrideClusterDropRate(partitionId, newDropLevel, trackerClientUpdaters);
        }
        // don't change the points map or the recoveryMap, but try load balancing strategy next time.
        newState = new PartitionDegraderLoadBalancerState(clusterGenerationId, config.getClock().currentTimeMillis(), true, oldState.getRingFactory(), oldPointsMap, PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE, newDropLevel, newCurrentAvgClusterLatency, isQuarantineEnabled ? newRecoveryMap : oldRecoveryMap, oldState.getServiceName(), oldState.getDegraderProperties(), totalClusterCallCount, quarantineMap, quarantineHistory);
        logState(oldState, newState, partitionId, config, trackerClientUpdaters);
        points = oldPointsMap;
    }
    // adjust the min call count for each client based on the hash ring reduction and call dropping
    // fraction.
    overrideMinCallCount(partitionId, currentOverrideDropRate, trackerClientUpdaters, points, pointsPerWeight);
    return newState;
}

Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) DegraderControl(com.linkedin.util.degrader.DegraderControl) URI(java.net.URI) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) HashSet(java.util.HashSet)

Aggregations

Map (java.util.Map)8 ArrayList (java.util.ArrayList)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 JSONObject (org.json.simple.JSONObject)6 None (com.linkedin.common.util.None)5 IOException (java.io.IOException)5 HashMap (java.util.HashMap)5 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)5 D2Client (com.linkedin.d2.balancer.D2Client)4 D2ClientBuilder (com.linkedin.d2.balancer.D2ClientBuilder)4 ZooKeeper (com.linkedin.d2.discovery.stores.zk.ZooKeeper)4 List (java.util.List)4 ScheduledFuture (java.util.concurrent.ScheduledFuture)4 URI (java.net.URI)3 AsyncCallback (org.apache.zookeeper.AsyncCallback)3 KeeperException (org.apache.zookeeper.KeeperException)3 Stat (org.apache.zookeeper.data.Stat)3 ParseException (org.json.simple.parser.ParseException)3 Test (org.testng.annotations.Test)3 TrackerClient (com.linkedin.d2.balancer.clients.TrackerClient)2