use of com.linkedin.d2.balancer.LoadBalancerStateItem in project rest.li by linkedin.
the class SimpleLoadBalancer method chooseTrackerClient.
private TrackerClient chooseTrackerClient(Request request, RequestContext requestContext, String serviceName, String clusterName, ClusterProperties cluster, LoadBalancerStateItem<UriProperties> uriItem, UriProperties uris, List<LoadBalancerState.SchemeStrategyPair> orderedStrategies, ServiceProperties serviceProperties) throws ServiceUnavailableException {
// now try and find a tracker client for the uri
TrackerClient trackerClient = null;
URI targetHost = KeyMapper.TargetHostHints.getRequestContextTargetHost(requestContext);
int partitionId = -1;
URI requestUri = request.getURI();
if (targetHost == null) {
PartitionAccessor accessor = getPartitionAccessor(serviceName, clusterName);
try {
partitionId = accessor.getPartitionId(requestUri);
} catch (PartitionAccessException e) {
die(serviceName, "PEGA_1013. Error in finding the partition for URI: " + requestUri + ", " + "in cluster: " + clusterName + ", " + e.getMessage());
}
} else {
// This is the case of scatter/gather or search, where the target host may be chosen to be responsible for
// more than one partitions (The target host was picked from a consistent hash ring, so load balancing is already in effect).
// we randomly pick one partition to check for the call dropping
// This is done for two reasons:
// 1. Currently there is no way to know for which subset of partitions the target host is chosen for
// if it is serving more than one partitions. This can be added, but it requires the change of public interfaces (KeyMapper) so that
// more hints can be added to the request context for the concerned the partitions
// 2. More importantly, there is no good way to check for call dropping even if the above problem is solved.
// For example, if a target host is chosen for partition 1, 5, 7, with call drop rates of 0, 0.2, 0.4 respectively
// A reasonable way to proceed would be use the highest drop rate and do the check once for the target host,
// but currently the check can only be done for each partition and only with boolean result (no access to drop rate)
// The partition to check is picked at random to be conservative.
// E.g. in the above example, we don't want to always use the drop rate of partition 1.
Map<Integer, PartitionData> partitionDataMap = uris.getPartitionDataMap(targetHost);
if (partitionDataMap == null || partitionDataMap.isEmpty()) {
die(serviceName, "PEGA_1014. There is no partition data for server host: " + targetHost + ". URI: " + requestUri);
}
Set<Integer> partitions = partitionDataMap.keySet();
Iterator<Integer> iterator = partitions.iterator();
int index = _random.nextInt(partitions.size());
for (int i = 0; i <= index; i++) {
partitionId = iterator.next();
}
}
Map<URI, TrackerClient> clientsToLoadBalance = null;
for (LoadBalancerState.SchemeStrategyPair pair : orderedStrategies) {
LoadBalancerStrategy strategy = pair.getStrategy();
String scheme = pair.getScheme();
TrackerClientSubsetItem subsetItem = getPotentialClients(serviceName, serviceProperties, cluster, uris, scheme, partitionId, uriItem.getVersion());
clientsToLoadBalance = subsetItem.getWeightedSubset();
trackerClient = strategy.getTrackerClient(request, requestContext, uriItem.getVersion(), partitionId, clientsToLoadBalance, subsetItem.shouldForceUpdate());
debug(_log, "load balancer strategy for ", serviceName, " returned: ", trackerClient);
// break as soon as we find an available cluster client
if (trackerClient != null) {
break;
}
}
if (trackerClient == null) {
if (clientsToLoadBalance == null || clientsToLoadBalance.isEmpty()) {
String requestedSchemes = orderedStrategies.stream().map(LoadBalancerState.SchemeStrategyPair::getScheme).collect(Collectors.joining(","));
die(serviceName, "PEGA_1015. Service: " + serviceName + " unable to find a host to route the request" + " in partition: " + partitionId + " cluster: " + clusterName + " scheme: [" + requestedSchemes + "]," + " total hosts in cluster: " + uris.Uris().size() + "." + " Check what cluster and scheme your servers are announcing to.");
} else {
die(serviceName, "PEGA_1016. Service: " + serviceName + " is in a bad state (high latency/high error). " + "Dropping request. Cluster: " + clusterName + ", partitionId:" + partitionId + " (choosable: " + clientsToLoadBalance.size() + " hosts, total in cluster: " + uris.Uris().size() + ")");
}
}
return trackerClient;
}
use of com.linkedin.d2.balancer.LoadBalancerStateItem in project rest.li by linkedin.
the class ServiceLoadBalancerSubscriber method handleRemove.
@Override
protected void handleRemove(final String listenTo) {
_log.warn("Received a service properties event to remove() for service = " + listenTo);
LoadBalancerStateItem<ServiceProperties> serviceItem = _simpleLoadBalancerState.getServiceProperties().remove(listenTo);
if (serviceItem != null && serviceItem.getProperty() != null) {
ServiceProperties serviceProperties = serviceItem.getProperty();
// remove this service from the cluster -> services map
Set<String> serviceNames = _simpleLoadBalancerState.getServicesPerCluster().get(serviceProperties.getClusterName());
if (serviceNames != null) {
serviceNames.remove(serviceProperties.getServiceName());
}
_simpleLoadBalancerState.shutdownClients(listenTo);
}
}
use of com.linkedin.d2.balancer.LoadBalancerStateItem in project rest.li by linkedin.
the class LoadBalancerTestState method getUriProperties.
@Override
public LoadBalancerStateItem<UriProperties> getUriProperties(String clusterName) {
try {
URI uri1 = URI.create("http://test.qa1.com:1234");
URI uri2 = URI.create("http://test.qa2.com:2345");
URI uri3 = URI.create("http://test.qa3.com:6789");
Map<Integer, PartitionData> partitionData = new HashMap<>(1);
partitionData.put(DefaultPartitionAccessor.DEFAULT_PARTITION_ID, new PartitionData(1d));
Map<URI, Map<Integer, PartitionData>> uriData = new HashMap<>(3);
uriData.put(uri1, partitionData);
uriData.put(uri2, partitionData);
uriData.put(uri3, partitionData);
return (getUriProperties) ? new LoadBalancerStateItem<>(new UriProperties("cluster-1", uriData), 0, 0) : null;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of com.linkedin.d2.balancer.LoadBalancerStateItem in project rest.li by linkedin.
the class PartitionedLoadBalancerTestState method getClusterProperties.
@Override
public LoadBalancerStateItem<ClusterProperties> getClusterProperties(String clusterName) {
List<String> prioritizedSchemes = new ArrayList<>();
prioritizedSchemes.add("http");
ClusterProperties clusterProperties = new ClusterProperties(_cluster, prioritizedSchemes);
return new LoadBalancerStateItem<>(clusterProperties, 1, 1);
}
use of com.linkedin.d2.balancer.LoadBalancerStateItem in project rest.li by linkedin.
the class SimpleLoadBalancerSimulation method verifyState.
/**
* Compare the simulator's view of reality with the load balancer's. This method should
* be called after every step is performed and all threads have finished.
*/
public void verifyState() {
// verify that we consumed all messages before we do anything
for (int i = 0; i < _queues.length; ++i) {
if (_queues[i].size() > 0) {
fail("there were messages left in the queue. all messages should have been consumed during this simulation step.");
}
}
// verify that all clients have been shut down
for (Map.Entry<String, TransportClientFactory> e : _clientFactories.entrySet()) {
DoNothingClientFactory factory = (DoNothingClientFactory) e.getValue();
if (factory.getRunningClientCount() != 0) {
fail("Not all clients were shut down from factory " + e.getKey());
}
}
try {
final CountDownLatch latch = new CountDownLatch(1);
PropertyEventShutdownCallback callback = new PropertyEventShutdownCallback() {
@Override
public void done() {
latch.countDown();
}
};
_state.shutdown(callback);
if (!latch.await(60, TimeUnit.SECONDS)) {
fail("unable to shutdown state");
}
} catch (InterruptedException e) {
fail("unable to shutdown state in verifyState.");
}
// New load balancer with no timeout; the code below checks for services that don't
// exist,
// and a load balancer with non-zero timeout will just timeout waiting for them to be
// registered, which will never happen because the PropertyEventThread is shut down.
_loadBalancer = new SimpleLoadBalancer(_state, 0, TimeUnit.SECONDS, _executorService);
// verify services are as we expect
for (String possibleService : _possibleServices) {
// about it
if (!_expectedServiceProperties.containsKey(possibleService) || !_state.isListeningToService(possibleService)) {
LoadBalancerStateItem<ServiceProperties> serviceItem = _state.getServiceProperties(possibleService);
assertTrue(serviceItem == null || serviceItem.getProperty() == null);
} else {
ServiceProperties serviceProperties = _expectedServiceProperties.get(possibleService);
ClusterProperties clusterProperties = _expectedClusterProperties.get(serviceProperties.getClusterName());
UriProperties uriProperties = _expectedUriProperties.get(serviceProperties.getClusterName());
assertEquals(_state.getServiceProperties(possibleService).getProperty(), serviceProperties);
// verify round robin'ing of the hosts for this service
for (int i = 0; i < 100; ++i) {
try {
// this call will queue up messages if we're not listening to the service, but
// it's ok, because all of the messengers have been stopped.
final TransportClient client = _loadBalancer.getClient(new URIRequest("d2://" + possibleService + random(_possiblePaths)), new RequestContext());
// if we didn't receive service unavailable, we should
// get a client back
assertNotNull(client, "Not found client for: d2://" + possibleService + random(_possiblePaths));
} catch (ServiceUnavailableException e) {
if (uriProperties != null && clusterProperties != null) {
// only way to get here is if the prioritized
// schemes could find no available uris in the
// cluster. let's see if we can find a URI that
// matches a prioritized scheme in the cluster.
Set<String> schemes = new HashSet<>();
for (URI uri : uriProperties.Uris()) {
schemes.add(uri.getScheme());
}
for (String scheme : clusterProperties.getPrioritizedSchemes()) {
// the code.
if (schemes.contains(scheme) && _clientFactories.containsKey(scheme)) {
break;
}
assertFalse(schemes.contains(scheme) && _clientFactories.containsKey(scheme), "why couldn't a client be found for schemes " + clusterProperties.getPrioritizedSchemes() + " with URIs: " + uriProperties.Uris());
}
}
}
}
}
}
// verify clusters are as we expect
for (String possibleCluster : _possibleClusters) {
LoadBalancerStateItem<ClusterProperties> clusterItem = _state.getClusterProperties(possibleCluster);
if (!_expectedClusterProperties.containsKey(possibleCluster) || !_state.isListeningToCluster(possibleCluster)) {
assertTrue(clusterItem == null || clusterItem.getProperty() == null, "cluster item for " + possibleCluster + " is not null: " + clusterItem);
} else {
assertNotNull(clusterItem, "Item for cluster " + possibleCluster + " should not be null, listening: " + _state.isListeningToCluster(possibleCluster) + ", keys: " + _expectedClusterProperties.keySet());
assertEquals(clusterItem.getProperty(), _expectedClusterProperties.get(possibleCluster));
}
}
// verify uris are as we expect
for (String possibleCluster : _possibleClusters) {
LoadBalancerStateItem<UriProperties> uriItem = _state.getUriProperties(possibleCluster);
if (!_expectedUriProperties.containsKey(possibleCluster) || !_state.isListeningToCluster(possibleCluster)) {
assertTrue(uriItem == null || uriItem.getProperty() == null);
} else {
assertNotNull(uriItem);
assertEquals(uriItem.getProperty(), _expectedUriProperties.get(possibleCluster));
}
}
}
Aggregations