Search in sources :

Example 1 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class SimpleLoadBalancerState method refreshTransportClientsPerService.

void refreshTransportClientsPerService(ServiceProperties serviceProperties) {
    String serviceName = serviceProperties.getServiceName();
    //create new TransportClients
    Map<String, TransportClient> newTransportClients = createAndInsertTransportClientTo(serviceProperties);
    // clients-by-scheme map is never edited, only replaced.
    newTransportClients = Collections.unmodifiableMap(newTransportClients);
    final Map<String, TransportClient> oldTransportClients = _serviceClients.put(serviceName, newTransportClients);
    // gets the information for configuring the parameter for how DegraderImpl should behave for
    // each tracker clients that we instantiate here. If there's no such information, then we'll instantiate
    // each tracker clients with default configuration
    DegraderImpl.Config config = null;
    if (serviceProperties.getDegraderProperties() != null && !serviceProperties.getDegraderProperties().isEmpty()) {
        config = DegraderConfigFactory.toDegraderConfig(serviceProperties.getDegraderProperties());
    } else {
        debug(_log, "trying to see if there's a special degraderImpl properties but serviceInfo.getDegraderImpl() is null" + " for service name = " + serviceName + " so we'll set config to default");
    }
    Clock clk = SystemClock.instance();
    if (serviceProperties.getLoadBalancerStrategyProperties() != null) {
        Map<String, Object> loadBalancerStrategyProperties = serviceProperties.getLoadBalancerStrategyProperties();
        clk = MapUtil.getWithDefault(loadBalancerStrategyProperties, PropertyKeys.CLOCK, SystemClock.instance(), Clock.class);
    }
    Map<URI, TrackerClient> newTrackerClients;
    // update all tracker clients to use new configs
    LoadBalancerStateItem<UriProperties> uriItem = _uriProperties.get(serviceProperties.getClusterName());
    UriProperties uriProperties = uriItem == null ? null : uriItem.getProperty();
    if (uriProperties != null) {
        Set<URI> uris = uriProperties.Uris();
        // clients-by-uri map may be edited later by UriPropertiesListener.handlePut
        newTrackerClients = new ConcurrentHashMap<URI, TrackerClient>(CollectionUtils.getMapInitialCapacity(uris.size(), 0.75f), 0.75f, 1);
        long trackerClientInterval = getTrackerClientInterval(serviceProperties);
        String errorStatusPattern = getErrorStatusPattern(serviceProperties);
        for (URI uri : uris) {
            TrackerClient trackerClient = getTrackerClient(serviceName, uri, uriProperties.getPartitionDataMap(uri), config, clk, trackerClientInterval, errorStatusPattern);
            if (trackerClient != null) {
                newTrackerClients.put(uri, trackerClient);
            }
        }
    } else {
        // clients-by-uri map may be edited later by UriPropertiesListener.handlePut
        newTrackerClients = new ConcurrentHashMap<URI, TrackerClient>(16, 0.75f, 1);
    }
    //override the oldTrackerClients with newTrackerClients
    _trackerClients.put(serviceName, newTrackerClients);
    // No need to shut down oldTrackerClients, because they all point directly to the TransportClient for the service
    // We do need to shut down the old transport clients
    shutdownTransportClients(oldTransportClients, serviceName);
}
Also used : TransportClient(com.linkedin.r2.transport.common.bridge.client.TransportClient) DegraderImpl(com.linkedin.util.degrader.DegraderImpl) SystemClock(com.linkedin.util.clock.SystemClock) Clock(com.linkedin.util.clock.Clock) URI(java.net.URI) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) UriProperties(com.linkedin.d2.balancer.properties.UriProperties)

Example 2 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerStrategyV2 method getUnhealthyTrackerClients.

private static List<String> getUnhealthyTrackerClients(List<TrackerClient> trackerClients, Map<URI, Integer> pointsMap, DegraderLoadBalancerStrategyConfig config) {
    List<String> unhealthyClients = new ArrayList<String>();
    for (TrackerClient client : trackerClients) {
        int perfectHealth = (int) (client.getPartitionWeight(DEFAULT_PARTITION_ID) * config.getPointsPerWeight());
        Integer point = pointsMap.get(client.getUri());
        if (point < perfectHealth) {
            unhealthyClients.add(client.getUri() + ":" + point + "/" + perfectHealth);
        }
    }
    return unhealthyClients;
}
Also used : TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient) ArrayList(java.util.ArrayList)

Example 3 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class TestRouteLookupClient method testRouteLookupClientCallback.

@Test
public void testRouteLookupClientCallback() throws InterruptedException, ExecutionException, TimeoutException {
    RouteLookup routeLookup = new SimpleTestRouteLookup();
    final D2Client d2Client = new D2ClientBuilder().setZkHosts("localhost:2121").build();
    d2Client.start(new FutureCallback<None>());
    RouteLookupClient routeLookupClient = new RouteLookupClient(d2Client, routeLookup, "WestCoast");
    RestRequest dummyRestRequest = new RestRequestBuilder(URI.create("d2://simple_uri")).build();
    FutureCallback<RestResponse> futureCallback = new FutureCallback<RestResponse>();
    routeLookupClient.restRequest(dummyRestRequest, futureCallback, "5555");
    try {
        RestResponse response = futureCallback.get(10, TimeUnit.SECONDS);
        Assert.fail("Unexpected success, request should have thrown a ServiceUnavailableException");
    } catch (Exception e) {
        String message = e.getMessage();
        if (!message.contains("_serviceName=simple_uriWestCoast5555Foo")) {
            Assert.fail("request was not rewritten to point at the d2 service simple_uriWestCoast5555Foo");
        }
    }
}
Also used : D2Client(com.linkedin.d2.balancer.D2Client) RestResponse(com.linkedin.r2.message.rest.RestResponse) D2ClientBuilder(com.linkedin.d2.balancer.D2ClientBuilder) TimeoutException(java.util.concurrent.TimeoutException) ExecutionException(java.util.concurrent.ExecutionException) RestRequest(com.linkedin.r2.message.rest.RestRequest) RestRequestBuilder(com.linkedin.r2.message.rest.RestRequestBuilder) None(com.linkedin.common.util.None) FutureCallback(com.linkedin.common.callback.FutureCallback) Test(org.testng.annotations.Test)

Example 4 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class DegraderLoadBalancerStrategyV2_1 method doUpdateState.

/**
   * updateState
   *
   * We have two mechanisms to influence the health and traffic patterns of the client. They are
   * by load balancing (switching traffic from one host to another) and by degrading service
   * (dropping calls). We load balance by allocating points in a consistent hash ring based on the
   * computedDropRate of the individual TrackerClients, which takes into account the latency
   * seen by that TrackerClient's requests. We can alternatively, if the cluster is
   * unhealthy (by using a high latency watermark) drop a portion of traffic across all tracker
   * clients corresponding to this cluster.
   *
   * The reason we do not currently consider error rate when adjusting the hash ring is that
   * there are legitimate errors that servers can send back for clients to handle, such as
   * 400 return codes. A potential improvement would be to catch transport level exceptions and 500
   * level return codes, but the implication of that would need to be carefully understood and documented.
   *
   * We don't want both to reduce hash points and allow clients to manage their own drop rates
   * because the clients do not have a global view that the load balancing strategy does. Without
   * a global view, the clients won't know if it already has a reduced number of hash points. If the
   * client continues to drop at the same drop rate as before their points have been reduced, then
   * the client would have its outbound request reduced by both reduction in points and the client's
   * drop rate. To avoid this, the drop rate is managed globally by the load balancing strategy and
   * provided to each client. The strategy will ALTERNATE between adjusting the hash ring points or
   * the global drop rate in order to avoid double penalizing a client. See below:
   *
   * Period 1
   * We found the average latency is greater than high water mark.
   * Then increase the global drop rate for this cluster (let's say from 0% to 20%)
   * so 20% of all calls gets dropped.
   * .
   * .
   * Period 2
   * The average latency is still higher than high water mark and we found
   * it is especially high for few specific clients in the cluster
   * Then reduce the number of hash points for those clients in the hash ring, with the hope we'll
   * redirect the traffic to "healthier" client and reduce the average latency
   * .
   * .
   * Period 3
   * The average latency is still higher than high water mark
   * Then we will alternate strategy by increasing the global rate for the whole cluster again
   * .
   * .
   * repeat until the latency becomes smaller than high water mark and higher than low water mark
   * to maintain the state. If the latency becomes lower than low water mark that means the cluster
   * is getting healthier so we can serve more traffic so we'll start recovery as explained below
   *
   * We also have a mechanism for recovery if the number of points in the hash ring is not
   * enough to receive traffic. The initialRecoveryLevel is a number between 0.0 and 1.0, and
   * corresponds to a weight of the tracker client's full hash points. e.g. if a client
   * has a default 100 hash points in a ring, 0.0 means there's 0 point for the client in the ring
   * and 1.0 means there are 100 points in the ring for the client.
   * The second configuration, rampFactor, will geometrically increase the
   * previous recoveryLevel if traffic still hasn't been seen for that tracker client.
   *
   * The reason for using weight instead of real points is to allow an initialRecoveryLevel that corresponds to
   * less than one hash point. This would be useful if a "cooling off" period is desirable for the
   * misbehaving tracker clients i.e. given a full weight of 100 hash points, 0.005 initialRecoverylevel
   * 0 hashpoints at start and rampFactor = 2 means that there will be one cooling off period before the
   * client is reintroduced into the hash ring (see below).
   *
   * Period 1
   * 100 * 0.005 = 0.5 point -> So nothing in the hashring
   *
   * Period 2
   * 100 * (0.005 * 2 because of rampfactor) = 1 point -> So we'll add one point in the hashring
   *
   * Another example, given initialRecoveryLevel = 0.01, rampFactor = 2, and default tracker client hash
   * points of 100, we will increase the hash points in this pattern on successive update States:
   * 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, etc. -> 1, 2, 4, 8, 16, 32 points in the hashring and aborting
   * as soon as calls are recorded for that tracker client.
   *
   * We also have highWaterMark and lowWaterMark as properties of the DegraderLoadBalancer strategy
   * so that the strategy can make decisions on whether to start dropping traffic GLOBALLY across
   * all tracker clients for this cluster. The amount of traffic to drop is controlled by the
   * globalStepUp and globalStepDown properties, where globalStepUp controls how much the global
   * drop rate increases per interval, and globalStepDown controls how much the global drop rate
   * decreases per interval. We only step up the global drop rate when the average cluster latency
   * is higher than the highWaterMark, and only step down the global drop rate when the average
   * cluster latency is lower than the global drop rate.
   *
   * This code is thread reentrant. Multiple threads can potentially call this concurrently, and so
   * callers must pass in the DegraderLoadBalancerState that they based their shouldUpdate() call on.
   * The multiple threads may have different views of the trackerClients latency, but this is
   * ok as the new state in the end will have only taken one action (either loadbalance or
   * call-dropping with at most one step). Currently we will not call this concurrently, as
   * checkUpdateState will control entry to a single thread.
   *
   * @param clusterGenerationId
   * @param oldState
   * @param config
   * @param trackerClientUpdaters
   */
private static DegraderLoadBalancerState doUpdateState(long clusterGenerationId, DegraderLoadBalancerState oldState, DegraderLoadBalancerStrategyConfig config, List<TrackerClientUpdater> trackerClientUpdaters) {
    debug(_log, "updating state for: ", trackerClientUpdaters);
    double sumOfClusterLatencies = 0.0;
    double computedClusterDropSum = 0.0;
    double computedClusterWeight = 0.0;
    long totalClusterCallCount = 0;
    boolean hashRingChanges = false;
    boolean recoveryMapChanges = false;
    DegraderLoadBalancerState.Strategy strategy = oldState.getStrategy();
    Map<TrackerClient, Double> oldRecoveryMap = oldState.getRecoveryMap();
    Map<TrackerClient, Double> newRecoveryMap = new HashMap<TrackerClient, Double>(oldRecoveryMap);
    double currentOverrideDropRate = oldState.getCurrentOverrideDropRate();
    double initialRecoveryLevel = config.getInitialRecoveryLevel();
    double ringRampFactor = config.getRingRampFactor();
    int pointsPerWeight = config.getPointsPerWeight();
    DegraderLoadBalancerState newState;
    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) {
        TrackerClient client = clientUpdater.getTrackerClient();
        double averageLatency = client.getDegraderControl(DEFAULT_PARTITION_ID).getLatency();
        long callCount = client.getDegraderControl(DEFAULT_PARTITION_ID).getCallCount();
        oldState.getPreviousMaxDropRate().put(client, clientUpdater.getMaxDropRate());
        sumOfClusterLatencies += averageLatency * callCount;
        totalClusterCallCount += callCount;
        double clientDropRate = client.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate();
        computedClusterDropSum += client.getPartitionWeight(DEFAULT_PARTITION_ID) * clientDropRate;
        computedClusterWeight += client.getPartitionWeight(DEFAULT_PARTITION_ID);
        boolean recoveryMapContainsClient = newRecoveryMap.containsKey(client);
        // points in the hash ring for the clients.
        if (callCount == 0) {
            // due solely to low volume.
            if (recoveryMapContainsClient) {
                // it may do nothing.
                if (strategy == DegraderLoadBalancerState.Strategy.LOAD_BALANCE) {
                    double oldMaxDropRate = clientUpdater.getMaxDropRate();
                    double transmissionRate = 1.0 - oldMaxDropRate;
                    if (transmissionRate <= 0.0) {
                        // We use the initialRecoveryLevel to indicate how many points to initially set
                        // the tracker client to when traffic has stopped flowing to this node.
                        transmissionRate = initialRecoveryLevel;
                    } else {
                        transmissionRate *= ringRampFactor;
                        transmissionRate = Math.min(transmissionRate, 1.0);
                    }
                    double newMaxDropRate = 1.0 - transmissionRate;
                    clientUpdater.setMaxDropRate(newMaxDropRate);
                }
                recoveryMapChanges = true;
            }
        } else //else we don't really need to change the client maxDropRate.
        if (recoveryMapContainsClient) {
            // else if the recovery map contains the client and the call count was > 0
            // tough love here, once the rehab clients start taking traffic, we
            // restore their maxDropRate to it's original value, and unenroll them
            // from the program.
            // This is safe because the hash ring points are controlled by the
            // computedDropRate variable, and the call dropping rate is controlled by
            // the overrideDropRate. The maxDropRate only serves to cap the computedDropRate and
            // overrideDropRate.
            // We store the maxDropRate and restore it here because the initialRecoveryLevel could
            // potentially be higher than what the default maxDropRate allowed. (the maxDropRate doesn't
            // necessarily have to be 1.0). For instance, if the maxDropRate was 0.99, and the
            // initialRecoveryLevel was 0.05  then we need to store the old maxDropRate.
            clientUpdater.setMaxDropRate(newRecoveryMap.get(client));
            newRecoveryMap.remove(client);
            recoveryMapChanges = true;
        }
    }
    double computedClusterDropRate = computedClusterDropSum / computedClusterWeight;
    debug(_log, "total cluster call count: ", totalClusterCallCount);
    debug(_log, "computed cluster drop rate for ", trackerClientUpdaters.size(), " nodes: ", computedClusterDropRate);
    if (oldState.getClusterGenerationId() == clusterGenerationId && totalClusterCallCount <= 0 && !recoveryMapChanges) {
        // if the cluster has not been called recently (total cluster call count is <= 0)
        // and we already have a state with the same set of URIs (same cluster generation),
        // and no clients are in rehab, then don't change anything.
        debug(_log, "New state is the same as the old state so we're not changing anything. Old state = ", oldState, ", config=", config);
        return new DegraderLoadBalancerState(oldState, clusterGenerationId, config.getUpdateIntervalMs(), config.getClock().currentTimeMillis());
    }
    // update our overrides.
    double newCurrentAvgClusterLatency = -1;
    if (totalClusterCallCount > 0) {
        newCurrentAvgClusterLatency = sumOfClusterLatencies / totalClusterCallCount;
    }
    debug(_log, "average cluster latency: ", newCurrentAvgClusterLatency);
    // This points map stores how many hash map points to allocate for each tracker client.
    Map<URI, Integer> points = new HashMap<URI, Integer>();
    Map<URI, Integer> oldPointsMap = oldState.getPointsMap();
    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) {
        TrackerClient client = clientUpdater.getTrackerClient();
        double successfulTransmissionWeight;
        URI clientUri = client.getUri();
        // Don't take into account cluster health when calculating the number of points
        // for each client. This is because the individual clients already take into account
        // latency, and a successfulTransmissionWeight can and should be made
        // independent of other nodes in the cluster. Otherwise, one unhealthy client in a small
        // cluster can take down the entire cluster if the avg latency is too high.
        // The global drop rate will take into account the cluster latency. High cluster-wide error
        // rates are not something d2 can address.
        //
        // this client's maxDropRate and currentComputedDropRate may have been adjusted if it's in the
        // rehab program (to gradually send traffic it's way).
        double dropRate = Math.min(client.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate(), clientUpdater.getMaxDropRate());
        // calculate the weight as the probability of successful transmission to this
        // node divided by the probability of successful transmission to the entire
        // cluster
        successfulTransmissionWeight = client.getPartitionWeight(DEFAULT_PARTITION_ID) * (1.0 - dropRate);
        // calculate the weight as the probability of a successful transmission to this node
        // multiplied by the client's self-defined weight. thus, the node's final weight
        // takes into account both the self defined weight (to account for different
        // hardware in the same cluster) and the performance of the node (as defined by the
        // node's degrader).
        debug(_log, "computed new weight for uri ", clientUri, ": ", successfulTransmissionWeight);
        // keep track if we're making actual changes to the Hash Ring in this updateState.
        int newPoints = (int) (successfulTransmissionWeight * pointsPerWeight);
        if (newPoints == 0) {
            // We are choking off traffic to this tracker client.
            // Enroll this tracker client in the recovery program so that
            // we can make sure it still gets some traffic
            Double oldMaxDropRate = clientUpdater.getMaxDropRate();
            // set the default recovery level.
            newPoints = (int) (initialRecoveryLevel * pointsPerWeight);
            // Keep track of the original maxDropRate
            if (!newRecoveryMap.containsKey(client)) {
                // keep track of this client,
                newRecoveryMap.put(client, oldMaxDropRate);
                clientUpdater.setMaxDropRate(1.0 - initialRecoveryLevel);
            }
        }
        points.put(clientUri, newPoints);
        if (!oldPointsMap.containsKey(clientUri) || oldPointsMap.get(clientUri) != newPoints) {
            hashRingChanges = true;
        }
    }
    // if there were changes to the members of the cluster
    if ((strategy == DegraderLoadBalancerState.Strategy.LOAD_BALANCE && hashRingChanges == true) || // strategy
    oldState.getClusterGenerationId() != clusterGenerationId) {
        // atomic overwrite
        // try Call Dropping next time we updateState.
        newState = new DegraderLoadBalancerState(config.getUpdateIntervalMs(), clusterGenerationId, points, config.getClock().currentTimeMillis(), DegraderLoadBalancerState.Strategy.CALL_DROPPING, currentOverrideDropRate, newCurrentAvgClusterLatency, true, newRecoveryMap, oldState.getServiceName(), oldState.getDegraderProperties(), totalClusterCallCount);
        logState(oldState, newState, config, trackerClientUpdaters);
    } else {
        // time to try call dropping strategy, if necessary.
        // we are explicitly setting the override drop rate to a number between 0 and 1, inclusive.
        double newDropLevel = Math.max(0.0, currentOverrideDropRate);
        // to get the cluster latency stabilized
        if (newCurrentAvgClusterLatency > 0 && totalClusterCallCount >= config.getMinClusterCallCountHighWaterMark()) {
            // statistically significant
            if (newCurrentAvgClusterLatency >= config.getHighWaterMark() && currentOverrideDropRate != 1.0) {
                // if the cluster latency is too high and we can drop more traffic
                newDropLevel = Math.min(1.0, newDropLevel + config.getGlobalStepUp());
            } else if (newCurrentAvgClusterLatency <= config.getLowWaterMark() && currentOverrideDropRate != 0.0) {
                // else if the cluster latency is good and we can reduce the override drop rate
                newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown());
            }
        // else the averageClusterLatency is between Low and High, or we can't change anything more,
        // then do not change anything.
        } else if (newCurrentAvgClusterLatency > 0 && totalClusterCallCount >= config.getMinClusterCallCountLowWaterMark()) {
            //but we might recover a bit if the latency is healthy
            if (newCurrentAvgClusterLatency <= config.getLowWaterMark() && currentOverrideDropRate != 0.0) {
                // the cluster latency is good and we can reduce the override drop rate
                newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown());
            }
        // else the averageClusterLatency is somewhat high but since the qps is not that high, we shouldn't degrade
        } else {
            // if we enter here that means we have very low traffic. We should reduce the overrideDropRate, if possible.
            // when we have below 1 QPS traffic, we should be pretty confident that the cluster can handle very low
            // traffic. Of course this is depending on the MinClusterCallCountLowWaterMark that the service owner sets.
            // Another possible cause for this is if we had somehow choked off all traffic to the cluster, most
            // likely in a one node/small cluster scenario. Obviously, we can't check latency here,
            // we'll have to rely on the metric in the next updateState. If the cluster is still having
            // latency problems, then we will oscillate between off and letting a little traffic through,
            // and that is acceptable. If the latency, though high, is deemed acceptable, then the
            // watermarks can be adjusted to let more traffic through.
            newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown());
        }
        if (newDropLevel != currentOverrideDropRate) {
            overrideClusterDropRate(newDropLevel, trackerClientUpdaters);
        }
        // don't change the points map or the recoveryMap, but try load balancing strategy next time.
        newState = new DegraderLoadBalancerState(config.getUpdateIntervalMs(), clusterGenerationId, oldPointsMap, config.getClock().currentTimeMillis(), DegraderLoadBalancerState.Strategy.LOAD_BALANCE, newDropLevel, newCurrentAvgClusterLatency, true, oldRecoveryMap, oldState.getServiceName(), oldState.getDegraderProperties(), totalClusterCallCount);
        logState(oldState, newState, config, trackerClientUpdaters);
        points = oldPointsMap;
    }
    // adjust the min call count for each client based on the hash ring reduction and call dropping
    // fraction.
    overrideMinCallCount(currentOverrideDropRate, trackerClientUpdaters, points, pointsPerWeight);
    return newState;
}
Also used : HashMap(java.util.HashMap) URI(java.net.URI) TrackerClient(com.linkedin.d2.balancer.clients.TrackerClient)

Example 5 with Point

use of com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point in project rest.li by linkedin.

the class D2Config method configure.

public int configure() throws Exception {
    // original map derived from properties file
    Map<String, Object> clusterServiceConfiguration = merge(_clusterServiceConfigurations);
    // map of clusterName -> cluster configuration
    Map<String, Map<String, Object>> clusters = new HashMap<String, Map<String, Object>>();
    // map of serviceName -> service configuration
    Map<String, Map<String, Object>> services = new HashMap<String, Map<String, Object>>();
    // Ugly. But this is a map of service groups, so it needs to reflect multiple services maps.
    Map<String, Map<String, Map<String, Object>>> serviceVariants = new HashMap<String, Map<String, Map<String, Object>>>();
    // temporary mapping from cluster name to services map, to aid in create cluster variants and
    // service groups.
    Map<String, Map<String, Map<String, Object>>> clusterToServiceMapping = new HashMap<String, Map<String, Map<String, Object>>>();
    int status;
    // temporary mapping from cluster name to the list of colo variants it has.
    Map<String, List<String>> variantToVariantsMapping = new HashMap<String, List<String>>();
    // temporary mapping from cluster name to coloVariant ClusterNames list.
    Map<String, List<String>> clusterToColoClustersMapping = new HashMap<String, List<String>>();
    // mapping from regular cluster name to the list of containing services
    // which will be added as children of the regular cluster znode.
    Map<String, List<String>> regularClusterToServicesMapping = new HashMap<>();
    _log.info("basePath: " + _basePath);
    _log.info("clusterDefaults: " + _clusterDefaults);
    _log.info("serviceDefaults: " + _serviceDefaults);
    final String defaultColo = (String) _clusterDefaults.remove(PropertyKeys.DEFAULT_COLO);
    // Solution 2 is the approach taken below.
    for (String clusterName : clusterServiceConfiguration.keySet()) {
        @SuppressWarnings("unchecked") Map<String, Object> clusterConfig = (Map<String, Object>) clusterServiceConfiguration.get(clusterName);
        clusterConfig.put(PropertyKeys.CLUSTER_NAME, clusterName);
        final Object servicesProperty = clusterConfig.remove(PropertyKeys.SERVICES);
        @SuppressWarnings("unchecked") Map<String, Map<String, Object>> servicesConfigs = (Map<String, Map<String, Object>>) servicesProperty;
        final Object clusterVariantProperty = clusterConfig.remove(PropertyKeys.CLUSTER_VARIANTS);
        @SuppressWarnings("unchecked") Map<String, Map<String, Object>> clusterVariantConfig = (Map<String, Map<String, Object>>) clusterVariantProperty;
        final Object coloVariantsProperty = clusterConfig.remove(PropertyKeys.COLO_VARIANTS);
        @SuppressWarnings("unchecked") List<String> coloVariants = (List<String>) coloVariantsProperty;
        final String masterColo = (String) clusterConfig.remove(PropertyKeys.MASTER_COLO);
        final String enableSymlinkString = (String) clusterConfig.remove(PropertyKeys.ENABLE_SYMLINK);
        final boolean enableSymlink;
        regularClusterToServicesMapping.put(clusterName, servicesConfigs.keySet().stream().collect(Collectors.toList()));
        if (enableSymlinkString != null && "true".equalsIgnoreCase(enableSymlinkString)) {
            enableSymlink = true;
        } else {
            enableSymlink = false;
        }
        // do some sanity check for partitions if any
        // Moving handling of partitionProperties before any coloVariant manipulations
        final Object partitionPropertiesProperty = clusterConfig.get(PropertyKeys.PARTITION_PROPERTIES);
        @SuppressWarnings("unchecked") Map<String, Object> partitionProperties = (Map<String, Object>) partitionPropertiesProperty;
        if (partitionProperties != null) {
            status = handlePartitionProperties(partitionProperties, clusterConfig, clusterName);
            if (status != 0) {
                return status;
            }
        }
        Map<String, String> clusterProperties = new HashMap<>();
        if (coloVariants != null && coloVariants.size() > 0 && !(coloVariants.size() == 1 && coloVariants.contains(""))) {
            clusterProperties.put(PropertyKeys.COLO_VARIANTS, String.join(LIST_SEPARATOR, coloVariants));
        }
        if (masterColo != null && !masterColo.equals("")) {
            clusterProperties.put(PropertyKeys.MASTER_COLO, masterColo);
        }
        if (clusterVariantConfig != null && clusterVariantConfig.size() > 0) {
            clusterProperties.put(PropertyKeys.CLUSTER_VARIANTS, String.join(LIST_SEPARATOR, clusterVariantConfig.keySet()));
        }
        clusterConfig.put(PropertyKeys.CLUSTER_PROPERTIES, clusterProperties);
        // lots of if/else.
        if (coloVariants == null || (coloVariants.size() == 1 && coloVariants.contains(""))) {
            coloVariants = Collections.singletonList("");
        } else {
            // one of the peer colos, if applicable.
            if (!coloVariants.contains(defaultColo)) {
                throw new IllegalStateException("The default colo: " + defaultColo + " is not one of the peer colos = " + coloVariants);
            }
            if (masterColo != null && !coloVariants.contains(masterColo) && !enableSymlink) {
                throw new IllegalStateException("The master colo: " + masterColo + " is not one of the peer colos = " + coloVariants);
            }
        }
        boolean defaultServicesCreated = false;
        for (String colo : coloVariants) {
            // the coloClusterName will be equal to the original cluster name if colo is the empty string
            String coloClusterName = D2Utils.addSuffixToBaseName(clusterName, colo);
            // coloServicesConfigs are the set of d2 services in this cluster in this colo
            // for the regular cluster case I could avoid creation of a new HashMap for both coloServicesConfig
            // and coloServiceConfig, as an optimization at the expense of simplicity.
            Map<String, Map<String, Object>> coloServicesConfigs = new HashMap<String, Map<String, Object>>();
            // Only create the default services once, and only when we have an empty colo string or the
            // colo matches the default colo.
            boolean createDefaultServices = (defaultServicesCreated == false) ? shouldCreateDefaultServices(colo, defaultColo) : false;
            for (String serviceName : servicesConfigs.keySet()) {
                // "resource" level config
                Map<String, Object> serviceConfig = servicesConfigs.get(serviceName);
                // There are some cases where we may not want to create colo variants of a particular service
                // We can't remove properties from the serviceConfig here because we might need to loop
                // over it multiple times.
                String createColoVariants = (String) serviceConfig.get(PropertyKeys.HAS_COLO_VARIANTS);
                boolean createColoVariantsForService = shouldCreateColoVariantsForService(colo, createColoVariants);
                String coloServiceName = serviceName;
                final boolean defaultRoutingToMasterColo = serviceConfig.containsKey(PropertyKeys.DEFAULT_ROUTING) && PropertyKeys.MASTER_SUFFIX.equals(serviceConfig.get(PropertyKeys.DEFAULT_ROUTING));
                // any colo variants of that serviceName.
                if (createColoVariantsForService) {
                    coloServiceName = D2Utils.addSuffixToBaseName(serviceName, colo);
                }
                final Object transportClientProperty = serviceConfig.get(PropertyKeys.TRANSPORT_CLIENT_PROPERTIES);
                @SuppressWarnings("unchecked") Map<String, Object> transportClientConfig = (Map<String, Object>) transportClientProperty;
                serviceConfig.put(PropertyKeys.TRANSPORT_CLIENT_PROPERTIES, transportClientConfig);
                Map<String, Object> coloServiceConfig = new HashMap<String, Object>(serviceConfig);
                // so it does not have to know about what are the default services.
                if (createDefaultServices && !defaultServicesCreated) {
                    // create the Master version of this service.
                    if (masterColo != null && createColoVariantsForService) {
                        // we need to create a "Master" version of this service to point to the current Master
                        // Cluster. Why not just use the original service name? We will point the original
                        // service name at the local cluster, as well as to make it explicit that requests
                        // sent to this service might cross colos, if the master is located in another colo.
                        Map<String, Object> masterServiceConfig = new HashMap<String, Object>(serviceConfig);
                        String masterServiceName = serviceName + PropertyKeys.MASTER_SUFFIX;
                        String masterClusterName;
                        if (enableSymlink) {
                            masterClusterName = D2Utils.getSymlinkNameForMaster(clusterName);
                        } else {
                            masterClusterName = D2Utils.addSuffixToBaseName(clusterName, masterColo);
                        }
                        masterServiceConfig.put(PropertyKeys.CLUSTER_NAME, masterClusterName);
                        masterServiceConfig.put(PropertyKeys.SERVICE_NAME, masterServiceName);
                        masterServiceConfig.put(PropertyKeys.IS_MASTER_SERVICE, "true");
                        coloServicesConfigs.put(masterServiceName, masterServiceConfig);
                    }
                    // this block will handle:
                    // the colo-agnostic service -> colo-specific default cluster mapping (fooService -> FooCluster-WestCoast)
                    // the colo-agnostic service -> colo-agnostic cluster mapping (fooService -> FooCluster)
                    // the latter only being done for regular clusters, the former only being done for clusters
                    // that have coloVariants specified.
                    Map<String, Object> regularServiceConfig = new HashMap<String, Object>(serviceConfig);
                    if (createColoVariantsForService) {
                        // we set isDefaultService flag only if it is a multi-colo aware service.
                        regularServiceConfig.put(PropertyKeys.IS_DEFAULT_SERVICE, "true");
                        if (defaultRoutingToMasterColo) {
                            regularServiceConfig.put(PropertyKeys.DEFAULT_ROUTING_TO_MASTER, "true");
                        }
                    }
                    final String defaultColoClusterName = clusterNameWithRouting(clusterName, colo, defaultColo, masterColo, defaultRoutingToMasterColo, enableSymlink);
                    regularServiceConfig.put(PropertyKeys.CLUSTER_NAME, defaultColoClusterName);
                    regularServiceConfig.put(PropertyKeys.SERVICE_NAME, serviceName);
                    coloServicesConfigs.put(serviceName, regularServiceConfig);
                }
                if (!serviceName.equals(coloServiceName)) {
                    // this block will handle:
                    // the colo-specific service-> colo-specific cluster mapping (fooService-WestCoast -> FooCluster-WestCoast,
                    // fooService-EastCoast -> FooCluster-EastCoast)
                    coloServiceConfig.put(PropertyKeys.CLUSTER_NAME, coloClusterName);
                    coloServiceConfig.put(PropertyKeys.SERVICE_NAME, coloServiceName);
                    coloServicesConfigs.put(coloServiceName, coloServiceConfig);
                }
            }
            // end for each service
            status = addServicesToServicesMap(coloServicesConfigs, services, coloClusterName);
            if (status != NO_ERROR_EXIT_CODE) {
                return status;
            }
            // Now that we've created colo-specific service to colo-specific cluster mappings, we now need
            // to actually create those colo-specific clusters.
            Map<String, Object> coloClusterConfig = clusterConfig;
            if (!clusterName.equals(coloClusterName)) {
                coloClusterConfig = new HashMap<String, Object>(clusterConfig);
                coloClusterConfig.put(PropertyKeys.CLUSTER_NAME, coloClusterName);
                if (createDefaultServices) {
                    clusters.put(clusterName, clusterConfig);
                }
            }
            clusters.put(coloClusterName, coloClusterConfig);
            // list before the cluster variants.
            if (clusterVariantConfig != null) {
                Map<String, Map<String, Object>> coloClusterVariantConfig = new HashMap<String, Map<String, Object>>(clusterVariantConfig);
                status = handleClusterVariants(coloClusterVariantConfig, clusterConfig, clusters, coloServicesConfigs, clusterToServiceMapping, colo, variantToVariantsMapping, masterColo, enableSymlink);
                if (status != 0) {
                    return status;
                }
            } else {
                // even if clusterVariant is not defined, it is still needed to save the coloServicesConfigs
                // in case the serviceGroup directly refers the cluster name
                clusterToServiceMapping.put(coloClusterName, coloServicesConfigs);
                // also save the coloClusterName
                addNewVariantToVariantsList(clusterToColoClustersMapping, clusterName, coloClusterName);
            }
            // the set the flag marking the default services for this cluster as created.
            if (!defaultServicesCreated && createDefaultServices == true) {
                defaultServicesCreated = true;
            }
        }
    // end for each colo variant
    }
    // there are service variants
    if (_serviceVariants != null) {
        for (String serviceGroup : _serviceVariants.keySet()) {
            // each service group contains a list of cluster names and a type field that
            // describes how to treat the list. We group together the services described by these
            // listed clusters, and prep that for writing to a different znode than the default service
            // znode directory. Note that we had already pointed those services to the appropriate cluster
            // variant earlier.
            Map<String, Map<String, Object>> servicesGroupConfig = new HashMap<String, Map<String, Object>>();
            @SuppressWarnings("unchecked") Map<String, Object> configGroupMap = (Map<String, Object>) _serviceVariants.get(serviceGroup);
            String type = (String) configGroupMap.get(PropertyKeys.TYPE);
            final Object clusterListProperty = configGroupMap.get(PropertyKeys.CLUSTER_LIST);
            @SuppressWarnings("unchecked") List<String> clusterList = (List<String>) clusterListProperty;
            // create an alternate service table for the services specified by these cluster variants
            for (Iterator<String> iter = clusterList.listIterator(); iter.hasNext(); ) {
                String clusterItem = iter.next();
                List<String> coloClusterVariantList = variantToVariantsMapping.get(clusterItem);
                if (coloClusterVariantList == null && PropertyKeys.FULL_CLUSTER_LIST.equals(type)) {
                    // For full_cluster_list type, it is allowed to specify real cluster name, not
                    // necessarily always clusterVariant. Check the clusterToColoClustersMappings.
                    coloClusterVariantList = clusterToColoClustersMapping.get(clusterItem);
                }
                if (coloClusterVariantList == null) {
                    // the service group had an unknown cluster!
                    _log.error("Unknown cluster specified: " + clusterItem);
                    return EXCEPTION_EXIT_CODE;
                }
                // in those coloVariants to this service group's list of services.
                for (String coloClusterVariant : coloClusterVariantList) {
                    Map<String, Map<String, Object>> candidateServices = clusterToServiceMapping.get(coloClusterVariant);
                    if (candidateServices == null) {
                        // the service group had an unknown cluster!
                        _log.error("Unknown cluster specified: " + coloClusterVariant);
                        return EXCEPTION_EXIT_CODE;
                    }
                    for (Map.Entry<String, Map<String, Object>> mapEntry : candidateServices.entrySet()) {
                        Object testValue = servicesGroupConfig.put(mapEntry.getKey(), mapEntry.getValue());
                        if (testValue != null) {
                            // We shouldn't have had conflicting services, two variants of the same cluster
                            // were probably specified in the same service group.
                            _log.error("Service group has variants of the same cluster: " + serviceGroup);
                            return EXCEPTION_EXIT_CODE;
                        }
                    }
                }
            }
            if (PropertyKeys.CLUSTER_VARIANTS_LIST.equals(type)) {
                // start from the full list of services, and then overwrite the services specified by the
                // cluster variants.
                Map<String, Map<String, Object>> fullServiceList = new HashMap<String, Map<String, Object>>(services);
                fullServiceList.putAll(servicesGroupConfig);
                serviceVariants.put(serviceGroup, fullServiceList);
            } else if (PropertyKeys.FULL_CLUSTER_LIST.equals(type)) {
                // The use has explicitly indicated that we should put these and only the services that
                // correspond to the named clusters in the serviceGroup.
                serviceVariants.put(serviceGroup, servicesGroupConfig);
            } else {
                _log.error("unknown serviceVariant type: " + type);
                return EXCEPTION_EXIT_CODE;
            }
        }
    }
    _log.debug("serviceVariants: " + serviceVariants);
    _zkConnection.start();
    try {
        _log.info("Cluster configuration:\n" + clusters);
        writeConfig(ZKFSUtil.clusterPath(_basePath), new ClusterPropertiesJsonSerializer(), new ClusterPropertiesJsonSerializer(), clusters, _clusterDefaults);
        _log.info("Wrote cluster configuration");
        _log.info("Service configuration:\n" + services);
        writeConfig(ZKFSUtil.servicePath(_basePath), new ServicePropertiesJsonSerializer(), new ServicePropertiesJsonSerializer(), services, _serviceDefaults);
        _log.info("Wrote service configuration");
        writeChildren(regularClusterToServicesMapping);
        _log.info("Wrote service children nodes under clusters");
        if (!serviceVariants.isEmpty()) {
            for (Map.Entry<String, Map<String, Map<String, Object>>> entry : serviceVariants.entrySet()) {
                if (_log.isDebugEnabled()) {
                    _log.info("serviceVariant: " + entry + "\n");
                } else {
                    _log.info("serviceVariant: " + entry.getKey() + "\n");
                }
                writeConfig(ZKFSUtil.servicePath(_basePath, entry.getKey()), new ServicePropertiesJsonSerializer(), new ServicePropertiesJsonSerializer(), entry.getValue(), _serviceDefaults);
            }
            _log.info("Wrote service variant configurations");
        }
        _log.info("Configuration complete");
        return NO_ERROR_EXIT_CODE;
    } finally {
        try {
            _zkConnection.shutdown();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            _log.warn("ZooKeeper shutdown interrupted", e);
        }
    }
}
Also used : HashMap(java.util.HashMap) ServicePropertiesJsonSerializer(com.linkedin.d2.balancer.properties.ServicePropertiesJsonSerializer) ClusterPropertiesJsonSerializer(com.linkedin.d2.balancer.properties.ClusterPropertiesJsonSerializer) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

TrackerClient (com.linkedin.d2.balancer.clients.TrackerClient)14 URI (java.net.URI)11 HashMap (java.util.HashMap)9 ArrayList (java.util.ArrayList)7 Test (org.testng.annotations.Test)6 TrackerClientTest (com.linkedin.d2.balancer.clients.TrackerClientTest)3 URIRequest (com.linkedin.d2.balancer.util.URIRequest)3 Point (com.linkedin.d2.balancer.util.hashing.ConsistentHashRing.Point)3 RequestContext (com.linkedin.r2.message.RequestContext)3 DegraderControl (com.linkedin.util.degrader.DegraderControl)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 None (com.linkedin.common.util.None)2 D2Client (com.linkedin.d2.balancer.D2Client)2 D2ClientBuilder (com.linkedin.d2.balancer.D2ClientBuilder)2 RestRequest (com.linkedin.r2.message.rest.RestRequest)2 RestRequestBuilder (com.linkedin.r2.message.rest.RestRequestBuilder)2 RestResponse (com.linkedin.r2.message.rest.RestResponse)2 CallCompletion (com.linkedin.util.degrader.CallCompletion)2 HashSet (java.util.HashSet)2