Search in sources :

Example 6 with Distribution

use of com.facebook.presto.raptor.metadata.Distribution in project presto by prestodb.

the class BucketBalancer method computeAssignmentChanges.

private static Multimap<String, BucketAssignment> computeAssignmentChanges(ClusterState clusterState) {
    Multimap<String, BucketAssignment> sourceToAllocationChanges = HashMultimap.create();
    Map<String, Long> allocationBytes = new HashMap<>(clusterState.getAssignedBytes());
    Set<String> activeNodes = clusterState.getActiveNodes();
    for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
        // number of buckets in this distribution assigned to a node
        Multiset<String> allocationCounts = HashMultiset.create();
        Collection<BucketAssignment> distributionAssignments = clusterState.getDistributionAssignments().get(distribution);
        distributionAssignments.stream().map(BucketAssignment::getNodeIdentifier).forEach(allocationCounts::add);
        int currentMin = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).min().getAsInt();
        int currentMax = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).max().getAsInt();
        int numBuckets = distributionAssignments.size();
        int targetMin = (int) Math.floor((numBuckets * 1.0) / clusterState.getActiveNodes().size());
        int targetMax = (int) Math.ceil((numBuckets * 1.0) / clusterState.getActiveNodes().size());
        log.info("Distribution %s: Current bucket skew: min %s, max %s. Target bucket skew: min %s, max %s", distribution.getId(), currentMin, currentMax, targetMin, targetMax);
        for (String source : ImmutableSet.copyOf(allocationCounts)) {
            List<BucketAssignment> existingAssignments = distributionAssignments.stream().filter(assignment -> assignment.getNodeIdentifier().equals(source)).collect(toList());
            for (BucketAssignment existingAssignment : existingAssignments) {
                if (activeNodes.contains(source) && allocationCounts.count(source) <= targetMin) {
                    break;
                }
                // identify nodes with bucket counts lower than the computed target, and greedily select from this set based on projected disk utilization.
                // greediness means that this may produce decidedly non-optimal results if one looks at the global distribution of buckets->nodes.
                // also, this assumes that nodes in a cluster have identical storage capacity
                String target = activeNodes.stream().filter(candidate -> !candidate.equals(source) && allocationCounts.count(candidate) < targetMax).sorted(comparingInt(allocationCounts::count)).min(Comparator.comparingDouble(allocationBytes::get)).orElseThrow(() -> new VerifyException("unable to find target for rebalancing"));
                long bucketSize = clusterState.getDistributionBucketSize().get(distribution);
                // only move bucket if it reduces imbalance
                if (activeNodes.contains(source) && (allocationCounts.count(source) == targetMax && allocationCounts.count(target) == targetMin)) {
                    break;
                }
                allocationCounts.remove(source);
                allocationCounts.add(target);
                allocationBytes.compute(source, (k, v) -> v - bucketSize);
                allocationBytes.compute(target, (k, v) -> v + bucketSize);
                sourceToAllocationChanges.put(existingAssignment.getNodeIdentifier(), new BucketAssignment(existingAssignment.getDistributionId(), existingAssignment.getBucketNumber(), target));
            }
        }
    }
    return sourceToAllocationChanges;
}
Also used : Nested(org.weakref.jmx.Nested) Logger(com.facebook.airlift.log.Logger) NodeSupplier(com.facebook.presto.raptor.NodeSupplier) MetadataConfig(com.facebook.presto.raptor.metadata.MetadataConfig) Multiset(com.google.common.collect.Multiset) CounterStat(com.facebook.airlift.stats.CounterStat) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) Duration(io.airlift.units.Duration) Inject(javax.inject.Inject) PreDestroy(javax.annotation.PreDestroy) Executors.newSingleThreadScheduledExecutor(java.util.concurrent.Executors.newSingleThreadScheduledExecutor) HashMultimap(com.google.common.collect.HashMultimap) Node(com.facebook.presto.spi.Node) Managed(org.weakref.jmx.Managed) Collectors.toMap(java.util.stream.Collectors.toMap) HashMultiset(com.google.common.collect.HashMultiset) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) BackupService(com.facebook.presto.raptor.backup.BackupService) Collectors.toSet(java.util.stream.Collectors.toSet) VerifyException(com.google.common.base.VerifyException) Distribution(com.facebook.presto.raptor.metadata.Distribution) Comparator.comparingInt(java.util.Comparator.comparingInt) ImmutableSet(com.google.common.collect.ImmutableSet) NodeManager(com.facebook.presto.spi.NodeManager) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) String.format(java.lang.String.format) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) BucketNode(com.facebook.presto.raptor.metadata.BucketNode) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RaptorConnectorId(com.facebook.presto.raptor.RaptorConnectorId) PostConstruct(javax.annotation.PostConstruct) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) HashMap(java.util.HashMap) VerifyException(com.google.common.base.VerifyException) Distribution(com.facebook.presto.raptor.metadata.Distribution)

Aggregations

Distribution (com.facebook.presto.raptor.metadata.Distribution)6 PrestoException (com.facebook.presto.spi.PrestoException)3 BucketNode (com.facebook.presto.raptor.metadata.BucketNode)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableMultimap (com.google.common.collect.ImmutableMultimap)2 HashMap (java.util.HashMap)2 Threads.daemonThreadsNamed (com.facebook.airlift.concurrent.Threads.daemonThreadsNamed)1 Logger (com.facebook.airlift.log.Logger)1 CounterStat (com.facebook.airlift.stats.CounterStat)1 Type (com.facebook.presto.common.type.Type)1 NodeSupplier (com.facebook.presto.raptor.NodeSupplier)1 RaptorBucketFunction.validateBucketType (com.facebook.presto.raptor.RaptorBucketFunction.validateBucketType)1 RaptorConnectorId (com.facebook.presto.raptor.RaptorConnectorId)1 BackupService (com.facebook.presto.raptor.backup.BackupService)1 MetadataConfig (com.facebook.presto.raptor.metadata.MetadataConfig)1 ShardManager (com.facebook.presto.raptor.metadata.ShardManager)1 ClusterState (com.facebook.presto.raptor.storage.BucketBalancer.ClusterState)1 Node (com.facebook.presto.spi.Node)1 NodeManager (com.facebook.presto.spi.NodeManager)1