use of io.trino.spi.Node in project trino by trinodb.
the class RaptorNodePartitioningProvider method getBucketNodeMap.
@Override
public ConnectorBucketNodeMap getBucketNodeMap(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorPartitioningHandle partitioning) {
RaptorPartitioningHandle handle = (RaptorPartitioningHandle) partitioning;
Map<String, Node> nodesById = uniqueIndex(nodeSupplier.getWorkerNodes(), Node::getNodeIdentifier);
ImmutableList.Builder<Node> bucketToNode = ImmutableList.builder();
for (String nodeIdentifier : handle.getBucketToNode()) {
Node node = nodesById.get(nodeIdentifier);
if (node == null) {
throw new TrinoException(NO_NODES_AVAILABLE, "Node for bucket is offline: " + nodeIdentifier);
}
bucketToNode.add(node);
}
return createBucketNodeMap(bucketToNode.build());
}
use of io.trino.spi.Node in project trino by trinodb.
the class TestRubixCaching method initializeRubix.
private void initializeRubix(RubixConfig rubixConfig, List<Node> nodes) throws Exception {
tempDirectory = createTempDirectory(getClass().getSimpleName());
// create cache directories
List<java.nio.file.Path> cacheDirectories = ImmutableList.of(tempDirectory.resolve("cache1"), tempDirectory.resolve("cache2"));
for (java.nio.file.Path directory : cacheDirectories) {
createDirectories(directory);
}
// initialize rubix in master-only mode
rubixConfig.setStartServerOnCoordinator(true);
rubixConfig.setCacheLocation(Joiner.on(",").join(cacheDirectories.stream().map(java.nio.file.Path::toString).collect(toImmutableList())));
HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of(// fetch data immediately in async mode
config -> setRemoteFetchProcessInterval(config, 0)));
TestingNodeManager nodeManager = new TestingNodeManager(nodes);
rubixInitializer = new RubixInitializer(rubixConfig, nodeManager, new CatalogName("catalog"), configurationInitializer, new DefaultRubixHdfsInitializer(new HdfsAuthenticationConfig()));
rubixConfigInitializer = new RubixConfigurationInitializer(rubixInitializer);
rubixInitializer.initializeRubix();
retry().run("wait for rubix to startup", () -> {
if (!rubixInitializer.isServerUp()) {
throw new IllegalStateException("Rubix server has not started");
}
return null;
});
}
use of io.trino.spi.Node in project trino by trinodb.
the class MemoryMetadata method beginCreateTable.
@Override
public synchronized MemoryOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorTableLayout> layout, RetryMode retryMode) {
checkSchemaExists(tableMetadata.getTable().getSchemaName());
checkTableNotExists(tableMetadata.getTable());
long tableId = nextTableId.getAndIncrement();
Set<Node> nodes = nodeManager.getRequiredWorkerNodes();
checkState(!nodes.isEmpty(), "No Memory nodes available");
ImmutableList.Builder<ColumnInfo> columns = ImmutableList.builder();
for (int i = 0; i < tableMetadata.getColumns().size(); i++) {
ColumnMetadata column = tableMetadata.getColumns().get(i);
columns.add(new ColumnInfo(new MemoryColumnHandle(i), column.getName(), column.getType()));
}
tableIds.put(tableMetadata.getTable(), tableId);
tables.put(tableId, new TableInfo(tableId, tableMetadata.getTable().getSchemaName(), tableMetadata.getTable().getTableName(), columns.build(), new HashMap<>()));
return new MemoryOutputTableHandle(tableId, ImmutableSet.copyOf(tableIds.values()));
}
use of io.trino.spi.Node in project trino by trinodb.
the class BucketBalancer method computeAssignmentChanges.
private static Multimap<String, BucketAssignment> computeAssignmentChanges(ClusterState clusterState) {
Multimap<String, BucketAssignment> sourceToAllocationChanges = HashMultimap.create();
Map<String, Long> allocationBytes = new HashMap<>(clusterState.getAssignedBytes());
Set<String> activeNodes = clusterState.getActiveNodes();
for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
// number of buckets in this distribution assigned to a node
Multiset<String> allocationCounts = HashMultiset.create();
Collection<BucketAssignment> distributionAssignments = clusterState.getDistributionAssignments().get(distribution);
distributionAssignments.stream().map(BucketAssignment::getNodeIdentifier).forEach(allocationCounts::add);
int currentMin = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).min().getAsInt();
int currentMax = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).max().getAsInt();
int numBuckets = distributionAssignments.size();
int targetMin = (int) Math.floor((numBuckets * 1.0) / clusterState.getActiveNodes().size());
int targetMax = (int) Math.ceil((numBuckets * 1.0) / clusterState.getActiveNodes().size());
log.info("Distribution %s: Current bucket skew: min %s, max %s. Target bucket skew: min %s, max %s", distribution.getId(), currentMin, currentMax, targetMin, targetMax);
for (String source : ImmutableSet.copyOf(allocationCounts)) {
List<BucketAssignment> existingAssignments = distributionAssignments.stream().filter(assignment -> assignment.getNodeIdentifier().equals(source)).collect(toList());
for (BucketAssignment existingAssignment : existingAssignments) {
if (activeNodes.contains(source) && allocationCounts.count(source) <= targetMin) {
break;
}
// identify nodes with bucket counts lower than the computed target, and greedily select from this set based on projected disk utilization.
// greediness means that this may produce decidedly non-optimal results if one looks at the global distribution of buckets->nodes.
// also, this assumes that nodes in a cluster have identical storage capacity
String target = activeNodes.stream().filter(candidate -> !candidate.equals(source) && allocationCounts.count(candidate) < targetMax).sorted(comparingInt(allocationCounts::count)).min(Comparator.comparingDouble(allocationBytes::get)).orElseThrow(() -> new VerifyException("unable to find target for rebalancing"));
long bucketSize = clusterState.getDistributionBucketSize().get(distribution);
// only move bucket if it reduces imbalance
if (activeNodes.contains(source) && (allocationCounts.count(source) == targetMax && allocationCounts.count(target) == targetMin)) {
break;
}
allocationCounts.remove(source);
allocationCounts.add(target);
allocationBytes.compute(source, (k, v) -> v - bucketSize);
allocationBytes.compute(target, (k, v) -> v + bucketSize);
sourceToAllocationChanges.put(existingAssignment.getNodeIdentifier(), new BucketAssignment(existingAssignment.getDistributionId(), existingAssignment.getBucketNumber(), target));
}
}
}
return sourceToAllocationChanges;
}
use of io.trino.spi.Node in project trino by trinodb.
the class RubixInitializer method getRubixServerConfiguration.
private Configuration getRubixServerConfiguration() {
Node master = nodeManager.getAllNodes().stream().filter(Node::isCoordinator).findFirst().get();
masterAddress = master.getHostAndPort();
Configuration configuration = getInitialConfiguration();
// Perform standard HDFS configuration initialization.
hdfsConfigurationInitializer.initializeConfiguration(configuration);
updateRubixConfiguration(configuration, RUBIX);
setCacheKey(configuration, "rubix_internal");
return configuration;
}
Aggregations