Search in sources :

Example 1 with Node

use of io.trino.spi.Node in project trino by trinodb.

the class RaptorNodePartitioningProvider method getBucketNodeMap.

@Override
public ConnectorBucketNodeMap getBucketNodeMap(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorPartitioningHandle partitioning) {
    RaptorPartitioningHandle handle = (RaptorPartitioningHandle) partitioning;
    Map<String, Node> nodesById = uniqueIndex(nodeSupplier.getWorkerNodes(), Node::getNodeIdentifier);
    ImmutableList.Builder<Node> bucketToNode = ImmutableList.builder();
    for (String nodeIdentifier : handle.getBucketToNode()) {
        Node node = nodesById.get(nodeIdentifier);
        if (node == null) {
            throw new TrinoException(NO_NODES_AVAILABLE, "Node for bucket is offline: " + nodeIdentifier);
        }
        bucketToNode.add(node);
    }
    return createBucketNodeMap(bucketToNode.build());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) Node(io.trino.spi.Node) TrinoException(io.trino.spi.TrinoException)

Example 2 with Node

use of io.trino.spi.Node in project trino by trinodb.

the class TestRubixCaching method initializeRubix.

private void initializeRubix(RubixConfig rubixConfig, List<Node> nodes) throws Exception {
    tempDirectory = createTempDirectory(getClass().getSimpleName());
    // create cache directories
    List<java.nio.file.Path> cacheDirectories = ImmutableList.of(tempDirectory.resolve("cache1"), tempDirectory.resolve("cache2"));
    for (java.nio.file.Path directory : cacheDirectories) {
        createDirectories(directory);
    }
    // initialize rubix in master-only mode
    rubixConfig.setStartServerOnCoordinator(true);
    rubixConfig.setCacheLocation(Joiner.on(",").join(cacheDirectories.stream().map(java.nio.file.Path::toString).collect(toImmutableList())));
    HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of(// fetch data immediately in async mode
    config -> setRemoteFetchProcessInterval(config, 0)));
    TestingNodeManager nodeManager = new TestingNodeManager(nodes);
    rubixInitializer = new RubixInitializer(rubixConfig, nodeManager, new CatalogName("catalog"), configurationInitializer, new DefaultRubixHdfsInitializer(new HdfsAuthenticationConfig()));
    rubixConfigInitializer = new RubixConfigurationInitializer(rubixInitializer);
    rubixInitializer.initializeRubix();
    retry().run("wait for rubix to startup", () -> {
        if (!rubixInitializer.isServerUp()) {
            throw new IllegalStateException("Rubix server has not started");
        }
        return null;
    });
}
Also used : Path(org.apache.hadoop.fs.Path) Arrays(java.util.Arrays) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) BlockLocation(org.apache.hadoop.fs.BlockLocation) FileSystem(org.apache.hadoop.fs.FileSystem) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Test(org.testng.annotations.Test) Random(java.util.Random) ReadMode(io.trino.plugin.hive.rubix.RubixConfig.ReadMode) FileStatus(org.apache.hadoop.fs.FileStatus) AfterMethod(org.testng.annotations.AfterMethod) Duration(io.airlift.units.Duration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) ASYNC(io.trino.plugin.hive.rubix.RubixConfig.ReadMode.ASYNC) Future(java.util.concurrent.Future) InetAddress.getLocalHost(java.net.InetAddress.getLocalHost) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Path(org.apache.hadoop.fs.Path) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) URI(java.net.URI) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ImmutableSet(com.google.common.collect.ImmutableSet) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) TestingNodeManager(io.trino.testing.TestingNodeManager) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) BeforeMethod(org.testng.annotations.BeforeMethod) PropertyMetadata(io.trino.spi.session.PropertyMetadata) ObjectName(javax.management.ObjectName) Files.createDirectories(java.nio.file.Files.createDirectories) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) CachingPrestoAzureBlobFileSystem(com.qubole.rubix.prestosql.CachingPrestoAzureBlobFileSystem) HdfsAuthenticationConfig(io.trino.plugin.hive.authentication.HdfsAuthenticationConfig) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) UNKNOWN(io.trino.client.NodeVersion.UNKNOWN) HdfsConfig(io.trino.plugin.hive.HdfsConfig) ByteStreams(com.google.common.io.ByteStreams) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) Joiner(com.google.common.base.Joiner) DataProvider(org.testng.annotations.DataProvider) READ_THROUGH(io.trino.plugin.hive.rubix.RubixConfig.ReadMode.READ_THROUGH) CachingPrestoDistributedFileSystem(com.qubole.rubix.prestosql.CachingPrestoDistributedFileSystem) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) Callable(java.util.concurrent.Callable) CachingPrestoGoogleHadoopFileSystem(com.qubole.rubix.prestosql.CachingPrestoGoogleHadoopFileSystem) CachingPrestoSecureAzureBlobFileSystem(com.qubole.rubix.prestosql.CachingPrestoSecureAzureBlobFileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) FilterFileSystem(org.apache.hadoop.fs.FilterFileSystem) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Closer(com.google.common.io.Closer) CachingPrestoAdlFileSystem(com.qubole.rubix.prestosql.CachingPrestoAdlFileSystem) MBeanServer(javax.management.MBeanServer) ManagementFactory(java.lang.management.ManagementFactory) ExecutorService(java.util.concurrent.ExecutorService) Node(io.trino.spi.Node) AfterClass(org.testng.annotations.AfterClass) RetryDriver.retry(io.trino.plugin.hive.util.RetryDriver.retry) CachingFileSystem(com.qubole.rubix.core.CachingFileSystem) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DefaultRubixHdfsInitializer(io.trino.plugin.hive.rubix.RubixModule.DefaultRubixHdfsInitializer) IOException(java.io.IOException) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) CatalogName(io.trino.plugin.base.CatalogName) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) TestingConnectorSession(io.trino.testing.TestingConnectorSession) InternalNode(io.trino.metadata.InternalNode) Assert.assertEventually(io.trino.testing.assertions.Assert.assertEventually) CacheConfig.setRemoteFetchProcessInterval(com.qubole.rubix.spi.CacheConfig.setRemoteFetchProcessInterval) Assert.assertTrue(org.testng.Assert.assertTrue) HiveConfig(io.trino.plugin.hive.HiveConfig) SECONDS(java.util.concurrent.TimeUnit.SECONDS) DefaultRubixHdfsInitializer(io.trino.plugin.hive.rubix.RubixModule.DefaultRubixHdfsInitializer) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) TestingNodeManager(io.trino.testing.TestingNodeManager) HdfsAuthenticationConfig(io.trino.plugin.hive.authentication.HdfsAuthenticationConfig) CatalogName(io.trino.plugin.base.CatalogName)

Example 3 with Node

use of io.trino.spi.Node in project trino by trinodb.

the class MemoryMetadata method beginCreateTable.

@Override
public synchronized MemoryOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorTableLayout> layout, RetryMode retryMode) {
    checkSchemaExists(tableMetadata.getTable().getSchemaName());
    checkTableNotExists(tableMetadata.getTable());
    long tableId = nextTableId.getAndIncrement();
    Set<Node> nodes = nodeManager.getRequiredWorkerNodes();
    checkState(!nodes.isEmpty(), "No Memory nodes available");
    ImmutableList.Builder<ColumnInfo> columns = ImmutableList.builder();
    for (int i = 0; i < tableMetadata.getColumns().size(); i++) {
        ColumnMetadata column = tableMetadata.getColumns().get(i);
        columns.add(new ColumnInfo(new MemoryColumnHandle(i), column.getName(), column.getType()));
    }
    tableIds.put(tableMetadata.getTable(), tableId);
    tables.put(tableId, new TableInfo(tableId, tableMetadata.getTable().getSchemaName(), tableMetadata.getTable().getTableName(), columns.build(), new HashMap<>()));
    return new MemoryOutputTableHandle(tableId, ImmutableSet.copyOf(tableIds.values()));
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Node(io.trino.spi.Node) Constraint(io.trino.spi.connector.Constraint)

Example 4 with Node

use of io.trino.spi.Node in project trino by trinodb.

the class BucketBalancer method computeAssignmentChanges.

private static Multimap<String, BucketAssignment> computeAssignmentChanges(ClusterState clusterState) {
    Multimap<String, BucketAssignment> sourceToAllocationChanges = HashMultimap.create();
    Map<String, Long> allocationBytes = new HashMap<>(clusterState.getAssignedBytes());
    Set<String> activeNodes = clusterState.getActiveNodes();
    for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
        // number of buckets in this distribution assigned to a node
        Multiset<String> allocationCounts = HashMultiset.create();
        Collection<BucketAssignment> distributionAssignments = clusterState.getDistributionAssignments().get(distribution);
        distributionAssignments.stream().map(BucketAssignment::getNodeIdentifier).forEach(allocationCounts::add);
        int currentMin = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).min().getAsInt();
        int currentMax = allocationBytes.keySet().stream().mapToInt(allocationCounts::count).max().getAsInt();
        int numBuckets = distributionAssignments.size();
        int targetMin = (int) Math.floor((numBuckets * 1.0) / clusterState.getActiveNodes().size());
        int targetMax = (int) Math.ceil((numBuckets * 1.0) / clusterState.getActiveNodes().size());
        log.info("Distribution %s: Current bucket skew: min %s, max %s. Target bucket skew: min %s, max %s", distribution.getId(), currentMin, currentMax, targetMin, targetMax);
        for (String source : ImmutableSet.copyOf(allocationCounts)) {
            List<BucketAssignment> existingAssignments = distributionAssignments.stream().filter(assignment -> assignment.getNodeIdentifier().equals(source)).collect(toList());
            for (BucketAssignment existingAssignment : existingAssignments) {
                if (activeNodes.contains(source) && allocationCounts.count(source) <= targetMin) {
                    break;
                }
                // identify nodes with bucket counts lower than the computed target, and greedily select from this set based on projected disk utilization.
                // greediness means that this may produce decidedly non-optimal results if one looks at the global distribution of buckets->nodes.
                // also, this assumes that nodes in a cluster have identical storage capacity
                String target = activeNodes.stream().filter(candidate -> !candidate.equals(source) && allocationCounts.count(candidate) < targetMax).sorted(comparingInt(allocationCounts::count)).min(Comparator.comparingDouble(allocationBytes::get)).orElseThrow(() -> new VerifyException("unable to find target for rebalancing"));
                long bucketSize = clusterState.getDistributionBucketSize().get(distribution);
                // only move bucket if it reduces imbalance
                if (activeNodes.contains(source) && (allocationCounts.count(source) == targetMax && allocationCounts.count(target) == targetMin)) {
                    break;
                }
                allocationCounts.remove(source);
                allocationCounts.add(target);
                allocationBytes.compute(source, (k, v) -> v - bucketSize);
                allocationBytes.compute(target, (k, v) -> v + bucketSize);
                sourceToAllocationChanges.put(existingAssignment.getNodeIdentifier(), new BucketAssignment(existingAssignment.getDistributionId(), existingAssignment.getBucketNumber(), target));
            }
        }
    }
    return sourceToAllocationChanges;
}
Also used : BucketNode(io.trino.plugin.raptor.legacy.metadata.BucketNode) Nested(org.weakref.jmx.Nested) Logger(io.airlift.log.Logger) Multiset(com.google.common.collect.Multiset) NodeManager(io.trino.spi.NodeManager) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) Duration(io.airlift.units.Duration) Inject(javax.inject.Inject) PreDestroy(javax.annotation.PreDestroy) Executors.newSingleThreadScheduledExecutor(java.util.concurrent.Executors.newSingleThreadScheduledExecutor) HashMultimap(com.google.common.collect.HashMultimap) Distribution(io.trino.plugin.raptor.legacy.metadata.Distribution) Managed(org.weakref.jmx.Managed) Collectors.toMap(java.util.stream.Collectors.toMap) HashMultiset(com.google.common.collect.HashMultiset) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) Collectors.toSet(java.util.stream.Collectors.toSet) Node(io.trino.spi.Node) VerifyException(com.google.common.base.VerifyException) Comparator.comparingInt(java.util.Comparator.comparingInt) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) CatalogName(io.trino.plugin.base.CatalogName) NodeSupplier(io.trino.plugin.raptor.legacy.NodeSupplier) BackupService(io.trino.plugin.raptor.legacy.backup.BackupService) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ShardManager(io.trino.plugin.raptor.legacy.metadata.ShardManager) PostConstruct(javax.annotation.PostConstruct) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) HashMap(java.util.HashMap) VerifyException(com.google.common.base.VerifyException) Distribution(io.trino.plugin.raptor.legacy.metadata.Distribution)

Example 5 with Node

use of io.trino.spi.Node in project trino by trinodb.

the class RubixInitializer method getRubixServerConfiguration.

private Configuration getRubixServerConfiguration() {
    Node master = nodeManager.getAllNodes().stream().filter(Node::isCoordinator).findFirst().get();
    masterAddress = master.getHostAndPort();
    Configuration configuration = getInitialConfiguration();
    // Perform standard HDFS configuration initialization.
    hdfsConfigurationInitializer.initializeConfiguration(configuration);
    updateRubixConfiguration(configuration, RUBIX);
    setCacheKey(configuration, "rubix_internal");
    return configuration;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConfigurationUtils.getInitialConfiguration(io.trino.plugin.hive.util.ConfigurationUtils.getInitialConfiguration) Node(io.trino.spi.Node)

Aggregations

Node (io.trino.spi.Node)11 ImmutableList (com.google.common.collect.ImmutableList)5 InternalNode (io.trino.metadata.InternalNode)4 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)4 FixedSplitSource (io.trino.spi.connector.FixedSplitSource)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Duration (io.airlift.units.Duration)2 CatalogName (io.trino.plugin.base.CatalogName)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Test (org.testng.annotations.Test)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Joiner (com.google.common.base.Joiner)1 VerifyException (com.google.common.base.VerifyException)1 HashMultimap (com.google.common.collect.HashMultimap)1 HashMultiset (com.google.common.collect.HashMultiset)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMultimap (com.google.common.collect.ImmutableMultimap)1 Multimap (com.google.common.collect.Multimap)1