private TrackerClient chooseTrackerClient(Request request, RequestContext requestContext, String serviceName, String clusterName, ClusterProperties cluster, LoadBalancerStateItem<UriProperties> uriItem, UriProperties uris, List<LoadBalancerState.SchemeStrategyPair> orderedStrategies, ServiceProperties serviceProperties) throws ServiceUnavailableException {
// now try and find a tracker client for the uri
TrackerClient trackerClient = null;
URI targetHost = KeyMapper.TargetHostHints.getRequestContextTargetHost(requestContext);
int partitionId = -1;
URI requestUri = request.getURI();
if (targetHost == null) {
PartitionAccessor accessor = getPartitionAccessor(serviceName, clusterName);
try {
partitionId = accessor.getPartitionId(requestUri);
} catch (PartitionAccessException e) {
die(serviceName, "PEGA_1013. Error in finding the partition for URI: " + requestUri + ", " + "in cluster: " + clusterName + ", " + e.getMessage());
} else {
// This is the case of scatter/gather or search, where the target host may be chosen to be responsible for
// more than one partitions (The target host was picked from a consistent hash ring, so load balancing is already in effect).
// we randomly pick one partition to check for the call dropping
// This is done for two reasons:
// 1. Currently there is no way to know for which subset of partitions the target host is chosen for
// if it is serving more than one partitions. This can be added, but it requires the change of public interfaces (KeyMapper) so that
// more hints can be added to the request context for the concerned the partitions
// 2. More importantly, there is no good way to check for call dropping even if the above problem is solved.
// For example, if a target host is chosen for partition 1, 5, 7, with call drop rates of 0, 0.2, 0.4 respectively
// A reasonable way to proceed would be use the highest drop rate and do the check once for the target host,
// but currently the check can only be done for each partition and only with boolean result (no access to drop rate)
// The partition to check is picked at random to be conservative.
// E.g. in the above example, we don't want to always use the drop rate of partition 1.
Map<Integer, PartitionData> partitionDataMap = uris.getPartitionDataMap(targetHost);
if (partitionDataMap == null || partitionDataMap.isEmpty()) {
die(serviceName, "PEGA_1014. There is no partition data for server host: " + targetHost + ". URI: " + requestUri);
Set<Integer> partitions = partitionDataMap.keySet();
Iterator<Integer> iterator = partitions.iterator();
int index = _random.nextInt(partitions.size());
for (int i = 0; i <= index; i++) {
partitionId =;
Map<URI, TrackerClient> clientsToLoadBalance = null;
for (LoadBalancerState.SchemeStrategyPair pair : orderedStrategies) {
LoadBalancerStrategy strategy = pair.getStrategy();
String scheme = pair.getScheme();
TrackerClientSubsetItem subsetItem = getPotentialClients(serviceName, serviceProperties, cluster, uris, scheme, partitionId, uriItem.getVersion());
clientsToLoadBalance = subsetItem.getWeightedSubset();
trackerClient = strategy.getTrackerClient(request, requestContext, uriItem.getVersion(), partitionId, clientsToLoadBalance, subsetItem.shouldForceUpdate());
debug(_log, "load balancer strategy for ", serviceName, " returned: ", trackerClient);
// break as soon as we find an available cluster client
if (trackerClient != null) {
if (trackerClient == null) {
if (clientsToLoadBalance == null || clientsToLoadBalance.isEmpty()) {
String requestedSchemes =","));
die(serviceName, "PEGA_1015. Service: " + serviceName + " unable to find a host to route the request" + " in partition: " + partitionId + " cluster: " + clusterName + " scheme: [" + requestedSchemes + "]," + " total hosts in cluster: " + uris.Uris().size() + "." + " Check what cluster and scheme your servers are announcing to.");
} else {
die(serviceName, "PEGA_1016. Service: " + serviceName + " is in a bad state (high latency/high error). " + "Dropping request. Cluster: " + clusterName + ", partitionId:" + partitionId + " (choosable: " + clientsToLoadBalance.size() + " hosts, total in cluster: " + uris.Uris().size() + ")");
return trackerClient;
public void testKeyMapper() throws Exception {
final String TEST_SERVICE_NAME = "test-service";
final String TEST_CLUSTER_NAME = "test-cluster";
final URI TEST_SERVER_URI1 = URI.create("http://test-host-1/");
final URI TEST_SERVER_URI2 = URI.create("http://test-host-2/");
final int NUM_ITERATIONS = 5;
try {
ZKFSLoadBalancer balancer = getBalancer();
FutureCallback<None> callback = new FutureCallback<>();
callback.get(30, TimeUnit.SECONDS);
ZKConnection conn = balancer.zkConnection();
ZooKeeperPermanentStore<ServiceProperties> serviceStore = new ZooKeeperPermanentStore<>(conn, new ServicePropertiesJsonSerializer(), ZKFSUtil.servicePath(BASE_PATH));
ServiceProperties props = new ServiceProperties(TEST_SERVICE_NAME, TEST_CLUSTER_NAME, "/test", Arrays.asList("degrader"), Collections.<String, Object>emptyMap(), null, null, Arrays.asList("http"), null);
serviceStore.put(TEST_SERVICE_NAME, props);
ClusterProperties clusterProperties = new ClusterProperties(TEST_CLUSTER_NAME);
ZooKeeperPermanentStore<ClusterProperties> clusterStore = new ZooKeeperPermanentStore<>(conn, new ClusterPropertiesJsonSerializer(), ZKFSUtil.clusterPath(BASE_PATH));
clusterStore.put(TEST_CLUSTER_NAME, clusterProperties);
ZooKeeperEphemeralStore<UriProperties> uriStore = new ZooKeeperEphemeralStore<>(conn, new UriPropertiesJsonSerializer(), new UriPropertiesMerger(), ZKFSUtil.uriPath(BASE_PATH), false, true);
Map<URI, Map<Integer, PartitionData>> uriData = new HashMap<>();
Map<Integer, PartitionData> partitionData = new HashMap<>(1);
partitionData.put(DefaultPartitionAccessor.DEFAULT_PARTITION_ID, new PartitionData(1.0d));
uriData.put(TEST_SERVER_URI1, partitionData);
uriData.put(TEST_SERVER_URI2, partitionData);
UriProperties uriProps = new UriProperties(TEST_CLUSTER_NAME, uriData);
callback = new FutureCallback<>();
callback.get(30, TimeUnit.SECONDS);
uriStore.put(TEST_CLUSTER_NAME, uriProps);
Set<Integer> keys = new HashSet<>();
for (int ii = 0; ii < 100; ++ii) {
for (int ii = 0; ii < NUM_ITERATIONS; ++ii) {
KeyMapper mapper = balancer.getKeyMapper();
MapKeyResult<URI, Integer> batches = mapper.mapKeysV2(URI.create("d2://" + TEST_SERVICE_NAME), keys);
Assert.assertEquals(batches.getMapResult().size(), 2);
for (Map.Entry<URI, Collection<Integer>> oneBatch : batches.getMapResult().entrySet()) {
} finally {
@Test(dataProvider = "requestBuilderDataProvider")
public static void testScatterGatherKVLoadBalancerIntegration(RootBuilderWrapper<Long, Greeting> builders) throws Exception {
SimpleLoadBalancer loadBalancer = MockLBFactory.createLoadBalancer();
KeyMapper keyMapper = new ConsistentHashKeyMapper(loadBalancer, new TestPartitionInfoProvider());
try {
keyMapper.mapKeysV2(URI.create("http://badurischeme/"), new HashSet<String>());"keyMapper should reject non-D2 URI scheme");
} catch (IllegalArgumentException e) {
// expected
ScatterGatherBuilder<Greeting> sg = new ScatterGatherBuilder<>(keyMapper);
final int NUM_IDS = 20;
Long[] requestIds = generateIds(NUM_IDS);
Collection<ScatterGatherBuilder.KVRequestInfo<Long, Greeting>> scatterGatherRequests = buildScatterGatherGetKVRequests(sg, requestIds);
@Test(dataProvider = "requestBuilderDataProvider")
public static void testScatterGatherLoadBalancerIntegration(RootBuilderWrapper<Long, Greeting> builders) throws Exception {
SimpleLoadBalancer loadBalancer = MockLBFactory.createLoadBalancer();
KeyMapper keyMapper = new ConsistentHashKeyMapper(loadBalancer, new TestPartitionInfoProvider());
try {
keyMapper.mapKeysV2(URI.create("http://badurischeme/"), new HashSet<String>());"keyMapper should reject non-D2 URI scheme");
} catch (IllegalArgumentException e) {
// expected
ScatterGatherBuilder<Greeting> sg = new ScatterGatherBuilder<>(keyMapper);
final int NUM_IDS = 20;
Long[] requestIds = generateIds(NUM_IDS);
Collection<ScatterGatherBuilder.RequestInfo<Greeting>> scatterGatherRequests = buildScatterGatherGetRequests(sg, requestIds);
// load balancer working with partitioned cluster
@Test(groups = { "small", "back-end" })
public void testLoadBalancerWithPartitionsSmoke() throws URISyntaxException, ServiceUnavailableException, InterruptedException, ExecutionException {
for (int tryAgain = 0; tryAgain < 12; ++tryAgain) {
Map<String, LoadBalancerStrategyFactory<? extends LoadBalancerStrategy>> loadBalancerStrategyFactories = new HashMap<>();
Map<String, TransportClientFactory> clientFactories = new HashMap<>();
List<String> prioritizedSchemes = new ArrayList<>();
MockStore<ServiceProperties> serviceRegistry = new MockStore<>();
MockStore<ClusterProperties> clusterRegistry = new MockStore<>();
MockStore<UriProperties> uriRegistry = new MockStore<>();
ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
loadBalancerStrategyFactories.put("degrader", new DegraderLoadBalancerStrategyFactoryV3());
clientFactories.put(PropertyKeys.HTTP_SCHEME, new DoNothingClientFactory());
SimpleLoadBalancerState state = new SimpleLoadBalancerState(executorService, uriRegistry, clusterRegistry, serviceRegistry, clientFactories, loadBalancerStrategyFactories);
SimpleLoadBalancer loadBalancer = new SimpleLoadBalancer(state, 5, TimeUnit.SECONDS, executorService);
FutureCallback<None> balancerCallback = new FutureCallback<>();
URI uri1 = URI.create("");
URI uri2 = URI.create("");
URI uri3 = URI.create("");
Map<URI, Double> uris = new HashMap<>();
uris.put(uri1, 1d);
uris.put(uri2, 1d);
uris.put(uri3, 1d);
Map<URI, Map<Integer, PartitionData>> partitionDesc = new HashMap<>();
Map<Integer, PartitionData> server1 = new HashMap<>();
server1.put(0, new PartitionData(1d));
server1.put(1, new PartitionData(1d));
Map<Integer, PartitionData> server2 = new HashMap<>();
server2.put(0, new PartitionData(1d));
Map<Integer, PartitionData> server3 = new HashMap<>();
server3.put(1, new PartitionData(1d));
partitionDesc.put(uri1, server1);
partitionDesc.put(uri2, server2);
partitionDesc.put(uri3, server3);
int partitionMethod = tryAgain % 4;
switch(partitionMethod) {
case 0:
clusterRegistry.put("cluster-1", new ClusterProperties("cluster-1", null, new HashMap<>(), new HashSet<>(), new RangeBasedPartitionProperties("id=(\\d+)", 0, 50, 2)));
case 1:
clusterRegistry.put("cluster-1", new ClusterProperties("cluster-1", null, new HashMap<>(), new HashSet<>(), new HashBasedPartitionProperties("id=(\\d+)", 2, HashBasedPartitionProperties.HashAlgorithm.valueOf("MODULO"))));
case 2:
clusterRegistry.put("cluster-1", new ClusterProperties("cluster-1", null, new HashMap<>(), new HashSet<>(), new HashBasedPartitionProperties("id=(\\d+)", 2, HashBasedPartitionProperties.HashAlgorithm.valueOf("MD5"))));
case 3:
// test getRings with gap. here, no server serves partition 2
clusterRegistry.put("cluster-1", new ClusterProperties("cluster-1", null, new HashMap<>(), new HashSet<>(), new RangeBasedPartitionProperties("id=(\\d+)", 0, 50, 4)));
server3.put(3, new PartitionData(1d));
partitionDesc.put(uri3, server3);
serviceRegistry.put("foo", new ServiceProperties("foo", "cluster-1", "/foo", Arrays.asList("degrader"), Collections.singletonMap(PropertyKeys.HTTP_LB_CONSISTENT_HASH_ALGORITHM, "pointBased"), null, null, prioritizedSchemes, null));
uriRegistry.put("cluster-1", new UriProperties("cluster-1", partitionDesc));
if (partitionMethod == 3) {
Map<Integer, Ring<URI>> ringMap = loadBalancer.getRings(URI.create("d2://foo"));
assertEquals(ringMap.size(), 4);
// the ring for partition 2 should be empty
assertEquals(ringMap.get(2).toString(), new ConsistentHashRing<>(Collections.emptyList()).toString());
URI expectedUri1 = URI.create("");
URI expectedUri2 = URI.create("");
URI expectedUri3 = URI.create("");
Set<URI> expectedUris = new HashSet<>();
for (int i = 0; i < 1000; ++i) {
int ii = i % 100;
RewriteLoadBalancerClient client = (RewriteLoadBalancerClient) loadBalancer.getClient(new URIRequest("d2://foo/id=" + ii), new RequestContext());
String clientUri = client.getUri().toString();
HashFunction<String[]> hashFunction = null;
String[] str = new String[1];
// test KeyMapper target host hint: request is always to target host regardless of what's in d2 URI and whether it's hash-based or range-based partitions
RequestContext requestContextWithHint = new RequestContext();
KeyMapper.TargetHostHints.setRequestContextTargetHost(requestContextWithHint, uri1);
RewriteLoadBalancerClient hintedClient1 = (RewriteLoadBalancerClient) loadBalancer.getClient(new URIRequest("d2://foo/id=" + ii), requestContextWithHint);
String hintedUri1 = hintedClient1.getUri().toString();
Assert.assertEquals(hintedUri1, uri1.toString() + "/foo");
RewriteLoadBalancerClient hintedClient2 = (RewriteLoadBalancerClient) loadBalancer.getClient(new URIRequest("d2://foo/action=purge-all"), requestContextWithHint);
String hintedUri2 = hintedClient2.getUri().toString();
Assert.assertEquals(hintedUri2, uri1.toString() + "/foo");
if (partitionMethod == 2) {
hashFunction = new MD5Hash();
for (URI uri : expectedUris) {
if (clientUri.contains(uri.toString())) {
// check if only key belonging to partition 0 gets uri2
if (uri.equals(uri2)) {
if (partitionMethod == 0) {
assertTrue(ii < 50);
} else if (partitionMethod == 1) {
assertTrue(ii % 2 == 0);
} else {
str[0] = ii + "";
assertTrue(hashFunction.hash(str) % 2 == 0);
// check if only key belonging to partition 1 gets uri3
if (uri.equals(uri3)) {
if (partitionMethod == 0) {
assertTrue(ii >= 50);
} else if (partitionMethod == 1) {
assertTrue(ii % 2 == 1);
} else {
str[0] = ii + "";
assertTrue(hashFunction.hash(str) % 2 == 1);
// two rings for two partitions
Map<Integer, Ring<URI>> ringMap = loadBalancer.getRings(URI.create("d2://foo"));
assertEquals(ringMap.size(), 2);
if (partitionMethod != 2) {
Set<String> keys = new HashSet<>();
for (int j = 0; j < 50; j++) {
if (partitionMethod == 0) {
keys.add(j + "");
} else {
keys.add(j * 2 + "");
// if it is range based partition, all keys from 0 ~ 49 belong to partition 0 according to the range definition
// if it is modulo based partition, all even keys belong to partition 0 because the partition count is 2
// only from partition 0
MapKeyResult<Ring<URI>, String> mapKeyResult = loadBalancer.getRings(URI.create("d2://foo"), keys);
Map<Ring<URI>, Collection<String>> keyToPartition = mapKeyResult.getMapResult();
assertEquals(keyToPartition.size(), 1);
for (Ring<URI> ring : keyToPartition.keySet()) {
assertEquals(ring, ringMap.get(0));
// now also from partition 1
mapKeyResult = loadBalancer.getRings(URI.create("d2://foo"), keys);
assertEquals(mapKeyResult.getMapResult().size(), 2);
assertEquals(mapKeyResult.getUnmappedKeys().size(), 0);
// now only from partition 1
mapKeyResult = loadBalancer.getRings(URI.create("d2://foo"), keys);
keyToPartition = mapKeyResult.getMapResult();
assertEquals(keyToPartition.size(), 1);
assertEquals(mapKeyResult.getUnmappedKeys().size(), 0);
for (Ring<URI> ring : keyToPartition.keySet()) {
assertEquals(ring, ringMap.get(1));
mapKeyResult = loadBalancer.getRings(URI.create("d2://foo"), keys);
if (partitionMethod == 0) {
// key out of range
Collection<MapKeyResult.UnmappedKey<String>> unmappedKeys = mapKeyResult.getUnmappedKeys();
assertEquals(unmappedKeys.size(), 1);
try {
loadBalancer.getClient(new URIRequest("d2://foo/id=100"), new RequestContext());
if (partitionMethod == 0) {
// key out of range
fail("Should throw ServiceUnavailableException caused by PartitionAccessException");
} catch (ServiceUnavailableException e) {
final CountDownLatch latch = new CountDownLatch(1);
PropertyEventShutdownCallback callback = new PropertyEventShutdownCallback() {
public void done() {
if (!latch.await(60, TimeUnit.SECONDS)) {
fail("unable to shutdown state");
assertTrue(executorService.isShutdown(), "ExecutorService should have shut down!");