use of com.facebook.presto.raptor.metadata.Distribution in project presto by prestodb.
the class BucketBalancer method computeAssignmentChanges.
private static Multimap<String, BucketAssignment> computeAssignmentChanges(ClusterState clusterState) {
Multimap<String, BucketAssignment> sourceToAllocationChanges = HashMultimap.create();
Map<String, Long> allocationBytes = new HashMap<>(clusterState.getAssignedBytes());
Set<String> activeNodes = clusterState.getActiveNodes();
for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
// number of buckets in this distribution assigned to a node
Multiset<String> allocationCounts = HashMultiset.create();
Collection<BucketAssignment> distributionAssignments = clusterState.getDistributionAssignments().get(distribution);
distributionAssignments.stream().map(BucketAssignment::getNodeIdentifier).forEach(allocationCounts::add);
int currentMin = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).min().getAsInt();
int currentMax = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).max().getAsInt();
int numBuckets = distributionAssignments.size();
int targetMin = (int) Math.floor((numBuckets * 1.0) / clusterState.getActiveNodes().size());
int targetMax = (int) Math.ceil((numBuckets * 1.0) / clusterState.getActiveNodes().size());
log.info("Distribution %s: Current bucket skew: min %s, max %s. Target bucket skew: min %s, max %s", distribution.getId(), currentMin, currentMax, targetMin, targetMax);
for (String source : ImmutableSet.copyOf(allocationCounts)) {
List<BucketAssignment> existingAssignments = distributionAssignments.stream().filter(assignment -> assignment.getNodeIdentifier().equals(source)).collect(toList());
for (BucketAssignment existingAssignment : existingAssignments) {
if (activeNodes.contains(source) && allocationCounts.count(source) <= targetMin) {
break;
}
// identify nodes with bucket counts lower than the computed target, and greedily select from this set based on projected disk utilization.
// greediness means that this may produce decidedly non-optimal results if one looks at the global distribution of buckets->nodes.
// also, this assumes that nodes in a cluster have identical storage capacity
String target = activeNodes.stream().filter(candidate -> !candidate.equals(source) && allocationCounts.count(candidate) < targetMax).sorted(comparingInt(allocationCounts::count)).min(Comparator.comparingDouble(allocationBytes::get)).orElseThrow(() -> new VerifyException("unable to find target for rebalancing"));
long bucketSize = clusterState.getDistributionBucketSize().get(distribution);
// only move bucket if it reduces imbalance
if (activeNodes.contains(source) && (allocationCounts.count(source) == targetMax && allocationCounts.count(target) == targetMin)) {
break;
}
allocationCounts.remove(source);
allocationCounts.add(target);
allocationBytes.compute(source, (k, v) -> v - bucketSize);
allocationBytes.compute(target, (k, v) -> v + bucketSize);
sourceToAllocationChanges.put(existingAssignment.getNodeIdentifier(), new BucketAssignment(existingAssignment.getDistributionId(), existingAssignment.getBucketNumber(), target));
}
}
}
return sourceToAllocationChanges;
}
Aggregations