use of io.trino.plugin.raptor.legacy.metadata.Distribution in project trino by trinodb.
the class RaptorMetadata method getOrCreateDistribution.
private Optional<DistributionInfo> getOrCreateDistribution(Map<String, RaptorColumnHandle> columnHandleMap, Map<String, Object> properties) {
OptionalInt bucketCount = getBucketCount(properties);
List<RaptorColumnHandle> bucketColumnHandles = getBucketColumnHandles(getBucketColumns(properties), columnHandleMap);
if (bucketCount.isPresent() && bucketColumnHandles.isEmpty()) {
throw new TrinoException(INVALID_TABLE_PROPERTY, format("Must specify '%s' along with '%s'", BUCKETED_ON_PROPERTY, BUCKET_COUNT_PROPERTY));
}
if (bucketCount.isEmpty() && !bucketColumnHandles.isEmpty()) {
throw new TrinoException(INVALID_TABLE_PROPERTY, format("Must specify '%s' along with '%s'", BUCKET_COUNT_PROPERTY, BUCKETED_ON_PROPERTY));
}
ImmutableList.Builder<Type> bucketColumnTypes = ImmutableList.builder();
for (RaptorColumnHandle column : bucketColumnHandles) {
validateBucketType(column.getColumnType());
bucketColumnTypes.add(column.getColumnType());
}
long distributionId;
String distributionName = getDistributionName(properties);
if (distributionName != null) {
if (bucketColumnHandles.isEmpty()) {
throw new TrinoException(INVALID_TABLE_PROPERTY, format("Must specify '%s' along with '%s'", BUCKETED_ON_PROPERTY, DISTRIBUTION_NAME_PROPERTY));
}
Distribution distribution = dao.getDistribution(distributionName);
if (distribution == null) {
if (bucketCount.isEmpty()) {
throw new TrinoException(INVALID_TABLE_PROPERTY, "Distribution does not exist and bucket count is not specified");
}
distribution = getOrCreateDistribution(distributionName, bucketColumnTypes.build(), bucketCount.getAsInt());
}
distributionId = distribution.getId();
if (bucketCount.isPresent() && (distribution.getBucketCount() != bucketCount.getAsInt())) {
throw new TrinoException(INVALID_TABLE_PROPERTY, "Bucket count must match distribution");
}
if (!distribution.getColumnTypes().equals(bucketColumnTypes.build())) {
throw new TrinoException(INVALID_TABLE_PROPERTY, "Bucket column types must match distribution");
}
} else if (bucketCount.isPresent()) {
String types = Distribution.serializeColumnTypes(bucketColumnTypes.build());
distributionId = dao.insertDistribution(null, types, bucketCount.getAsInt());
} else {
return Optional.empty();
}
shardManager.createBuckets(distributionId, bucketCount.getAsInt());
return Optional.of(new DistributionInfo(distributionId, bucketCount.getAsInt(), bucketColumnHandles));
}
use of io.trino.plugin.raptor.legacy.metadata.Distribution in project trino by trinodb.
the class BucketBalancer method fetchClusterState.
@VisibleForTesting
ClusterState fetchClusterState() {
Set<String> activeNodes = nodeSupplier.getWorkerNodes().stream().map(Node::getNodeIdentifier).collect(toSet());
Map<String, Long> assignedNodeSize = new HashMap<>(activeNodes.stream().collect(toMap(node -> node, node -> 0L)));
ImmutableMultimap.Builder<Distribution, BucketAssignment> distributionAssignments = ImmutableMultimap.builder();
ImmutableMap.Builder<Distribution, Long> distributionBucketSize = ImmutableMap.builder();
for (Distribution distribution : shardManager.getDistributions()) {
long distributionSize = shardManager.getDistributionSizeInBytes(distribution.getId());
long bucketSize = (long) (1.0 * distributionSize) / distribution.getBucketCount();
distributionBucketSize.put(distribution, bucketSize);
for (BucketNode bucketNode : shardManager.getBucketNodes(distribution.getId())) {
String node = bucketNode.getNodeIdentifier();
distributionAssignments.put(distribution, new BucketAssignment(distribution.getId(), bucketNode.getBucketNumber(), node));
assignedNodeSize.merge(node, bucketSize, Math::addExact);
}
}
return new ClusterState(activeNodes, assignedNodeSize, distributionAssignments.build(), distributionBucketSize.buildOrThrow());
}
use of io.trino.plugin.raptor.legacy.metadata.Distribution in project trino by trinodb.
the class BucketBalancer method computeAssignmentChanges.
private static Multimap<String, BucketAssignment> computeAssignmentChanges(ClusterState clusterState) {
Multimap<String, BucketAssignment> sourceToAllocationChanges = HashMultimap.create();
Map<String, Long> allocationBytes = new HashMap<>(clusterState.getAssignedBytes());
Set<String> activeNodes = clusterState.getActiveNodes();
for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
// number of buckets in this distribution assigned to a node
Multiset<String> allocationCounts = HashMultiset.create();
Collection<BucketAssignment> distributionAssignments = clusterState.getDistributionAssignments().get(distribution);
distributionAssignments.stream().map(BucketAssignment::getNodeIdentifier).forEach(allocationCounts::add);
int currentMin = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).min().getAsInt();
int currentMax = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).max().getAsInt();
int numBuckets = distributionAssignments.size();
int targetMin = (int) Math.floor((numBuckets * 1.0) / clusterState.getActiveNodes().size());
int targetMax = (int) Math.ceil((numBuckets * 1.0) / clusterState.getActiveNodes().size());
log.info("Distribution %s: Current bucket skew: min %s, max %s. Target bucket skew: min %s, max %s", distribution.getId(), currentMin, currentMax, targetMin, targetMax);
for (String source : ImmutableSet.copyOf(allocationCounts)) {
List<BucketAssignment> existingAssignments = distributionAssignments.stream().filter(assignment -> assignment.getNodeIdentifier().equals(source)).collect(toList());
for (BucketAssignment existingAssignment : existingAssignments) {
if (activeNodes.contains(source) && allocationCounts.count(source) <= targetMin) {
break;
}
// identify nodes with bucket counts lower than the computed target, and greedily select from this set based on projected disk utilization.
// greediness means that this may produce decidedly non-optimal results if one looks at the global distribution of buckets->nodes.
// also, this assumes that nodes in a cluster have identical storage capacity
String target = activeNodes.stream().filter(candidate -> !candidate.equals(source) && allocationCounts.count(candidate) < targetMax).sorted(comparingInt(allocationCounts::count)).min(Comparator.comparingDouble(allocationBytes::get)).orElseThrow(() -> new VerifyException("unable to find target for rebalancing"));
long bucketSize = clusterState.getDistributionBucketSize().get(distribution);
// only move bucket if it reduces imbalance
if (activeNodes.contains(source) && (allocationCounts.count(source) == targetMax && allocationCounts.count(target) == targetMin)) {
break;
}
allocationCounts.remove(source);
allocationCounts.add(target);
allocationBytes.compute(source, (k, v) -> v - bucketSize);
allocationBytes.compute(target, (k, v) -> v + bucketSize);
sourceToAllocationChanges.put(existingAssignment.getNodeIdentifier(), new BucketAssignment(existingAssignment.getDistributionId(), existingAssignment.getBucketNumber(), target));
}
}
}
return sourceToAllocationChanges;
}
use of io.trino.plugin.raptor.legacy.metadata.Distribution in project trino by trinodb.
the class TestBucketBalancer method assertBalancing.
private static void assertBalancing(BucketBalancer balancer, int expectedMoves) {
int actualMoves = balancer.balance();
assertEquals(actualMoves, expectedMoves);
// check that number of buckets per node is within bounds
ClusterState clusterState = balancer.fetchClusterState();
for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
Multiset<String> allocationCounts = HashMultiset.create();
clusterState.getDistributionAssignments().get(distribution).stream().map(BucketAssignment::getNodeIdentifier).forEach(allocationCounts::add);
double bucketsPerNode = (1.0 * allocationCounts.size()) / clusterState.getActiveNodes().size();
for (String node : allocationCounts) {
assertGreaterThanOrEqual(allocationCounts.count(node), (int) Math.floor(bucketsPerNode), node + " has fewer buckets than expected");
assertLessThanOrEqual(allocationCounts.count(node), (int) Math.ceil(bucketsPerNode), node + " has more buckets than expected");
}
}
// check stability
assertEquals(balancer.balance(), 0);
}
use of io.trino.plugin.raptor.legacy.metadata.Distribution in project trino by trinodb.
the class RaptorMetadata method getOrCreateDistribution.
private Distribution getOrCreateDistribution(String name, List<Type> columnTypes, int bucketCount) {
String types = Distribution.serializeColumnTypes(columnTypes);
runIgnoringConstraintViolation(() -> dao.insertDistribution(name, types, bucketCount));
Distribution distribution = dao.getDistribution(name);
if (distribution == null) {
throw new TrinoException(RAPTOR_ERROR, "Distribution does not exist after insert");
}
return distribution;
}
Aggregations