Search in sources :

Example 1 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class NodePartitioningManager method getConnectorBucketNodeMap.

public ConnectorBucketNodeMap getConnectorBucketNodeMap(Session session, PartitioningHandle partitioningHandle) {
    CatalogName catalogName = partitioningHandle.getConnectorId().orElseThrow(() -> new IllegalArgumentException("No connector ID for partitioning handle: " + partitioningHandle));
    ConnectorNodePartitioningProvider partitioningProvider = getPartitioningProvider(catalogName);
    ConnectorBucketNodeMap connectorBucketNodeMap = partitioningProvider.getBucketNodeMap(partitioningHandle.getTransactionHandle().orElseThrow(() -> new IllegalArgumentException("No transactionHandle for partitioning handle: " + partitioningHandle)), session.toConnectorSession(catalogName), partitioningHandle.getConnectorHandle());
    checkArgument(connectorBucketNodeMap != null, "No partition map %s", partitioningHandle);
    return connectorBucketNodeMap;
}
Also used : ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) CatalogName(io.trino.connector.CatalogName)

Example 2 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class NodePartitioningManager method getBucketNodeMap.

public BucketNodeMap getBucketNodeMap(Session session, PartitioningHandle partitioningHandle, boolean preferDynamic) {
    ConnectorBucketNodeMap connectorBucketNodeMap = getConnectorBucketNodeMap(session, partitioningHandle);
    if (connectorBucketNodeMap.hasFixedMapping()) {
        return new FixedBucketNodeMap(getSplitToBucket(session, partitioningHandle), getFixedMapping(connectorBucketNodeMap));
    }
    if (preferDynamic) {
        return new DynamicBucketNodeMap(getSplitToBucket(session, partitioningHandle), connectorBucketNodeMap.getBucketCount());
    }
    Optional<CatalogName> catalogName = partitioningHandle.getConnectorId();
    checkArgument(catalogName.isPresent(), "No connector ID for partitioning handle: %s", partitioningHandle);
    return new FixedBucketNodeMap(getSplitToBucket(session, partitioningHandle), createArbitraryBucketToNode(new ArrayList<>(nodeScheduler.createNodeSelector(session, catalogName).allNodes()), connectorBucketNodeMap.getBucketCount()));
}
Also used : DynamicBucketNodeMap(io.trino.execution.scheduler.group.DynamicBucketNodeMap) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ArrayList(java.util.ArrayList) CatalogName(io.trino.connector.CatalogName) FixedBucketNodeMap(io.trino.execution.scheduler.FixedBucketNodeMap)

Example 3 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class NodePartitioningManager method getNodePartitioningMap.

public NodePartitionMap getNodePartitioningMap(Session session, PartitioningHandle partitioningHandle) {
    requireNonNull(session, "session is null");
    requireNonNull(partitioningHandle, "partitioningHandle is null");
    if (partitioningHandle.getConnectorHandle() instanceof SystemPartitioningHandle) {
        return ((SystemPartitioningHandle) partitioningHandle.getConnectorHandle()).getNodePartitionMap(session, nodeScheduler);
    }
    ConnectorBucketNodeMap connectorBucketNodeMap = getConnectorBucketNodeMap(session, partitioningHandle);
    // safety check for crazy partitioning
    checkArgument(connectorBucketNodeMap.getBucketCount() < 1_000_000, "Too many buckets in partitioning: %s", connectorBucketNodeMap.getBucketCount());
    List<InternalNode> bucketToNode;
    if (connectorBucketNodeMap.hasFixedMapping()) {
        bucketToNode = getFixedMapping(connectorBucketNodeMap);
    } else {
        CatalogName catalogName = partitioningHandle.getConnectorId().orElseThrow(() -> new IllegalArgumentException("No connector ID for partitioning handle: " + partitioningHandle));
        bucketToNode = createArbitraryBucketToNode(nodeScheduler.createNodeSelector(session, Optional.of(catalogName)).allNodes(), connectorBucketNodeMap.getBucketCount());
    }
    int[] bucketToPartition = new int[connectorBucketNodeMap.getBucketCount()];
    BiMap<InternalNode, Integer> nodeToPartition = HashBiMap.create();
    int nextPartitionId = 0;
    for (int bucket = 0; bucket < bucketToNode.size(); bucket++) {
        InternalNode node = bucketToNode.get(bucket);
        Integer partitionId = nodeToPartition.get(node);
        if (partitionId == null) {
            partitionId = nextPartitionId++;
            nodeToPartition.put(node, partitionId);
        }
        bucketToPartition[bucket] = partitionId;
    }
    List<InternalNode> partitionToNode = IntStream.range(0, nodeToPartition.size()).mapToObj(partitionId -> nodeToPartition.inverse().get(partitionId)).collect(toImmutableList());
    return new NodePartitionMap(partitionToNode, bucketToPartition, getSplitToBucket(session, partitioningHandle));
}
Also used : IntStream(java.util.stream.IntStream) BucketFunction(io.trino.spi.connector.BucketFunction) BucketNodeMap(io.trino.execution.scheduler.BucketNodeMap) Type(io.trino.spi.type.Type) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) Inject(javax.inject.Inject) FixedBucketNodeMap(io.trino.execution.scheduler.FixedBucketNodeMap) CatalogName(io.trino.connector.CatalogName) NodeScheduler(io.trino.execution.scheduler.NodeScheduler) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorPartitionHandle(io.trino.spi.connector.ConnectorPartitionHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) DynamicBucketNodeMap(io.trino.execution.scheduler.group.DynamicBucketNodeMap) BiMap(com.google.common.collect.BiMap) EmptySplit(io.trino.split.EmptySplit) PartitionFunction(io.trino.operator.PartitionFunction) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) BlockTypeOperators(io.trino.type.BlockTypeOperators) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ToIntFunction(java.util.function.ToIntFunction) BucketPartitionFunction(io.trino.operator.BucketPartitionFunction) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) HashBiMap(com.google.common.collect.HashBiMap) InternalNode(io.trino.metadata.InternalNode) List(java.util.List) Stream(java.util.stream.Stream) Split(io.trino.metadata.Split) Optional(java.util.Optional) Collections(java.util.Collections) Session(io.trino.Session) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) CatalogName(io.trino.connector.CatalogName) InternalNode(io.trino.metadata.InternalNode)

Example 4 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class DetermineTableScanNodePartitioning method apply.

@Override
public Result apply(TableScanNode node, Captures captures, Context context) {
    TableProperties properties = metadata.getTableProperties(context.getSession(), node.getTable());
    if (properties.getTablePartitioning().isEmpty()) {
        return Result.ofPlanNode(node.withUseConnectorNodePartitioning(false));
    }
    TablePartitioning partitioning = properties.getTablePartitioning().get();
    ConnectorBucketNodeMap bucketNodeMap = nodePartitioningManager.getConnectorBucketNodeMap(context.getSession(), partitioning.getPartitioningHandle());
    if (bucketNodeMap.hasFixedMapping()) {
        // use connector table scan node partitioning when bucket to node assignments are fixed
        return Result.ofPlanNode(node.withUseConnectorNodePartitioning(true));
    }
    if (!isUseTableScanNodePartitioning(context.getSession())) {
        return Result.ofPlanNode(node.withUseConnectorNodePartitioning(false));
    }
    int numberOfBuckets = bucketNodeMap.getBucketCount();
    int numberOfTasks = max(taskCountEstimator.estimateSourceDistributedTaskCount(context.getSession()), 1);
    return Result.ofPlanNode(node.withUseConnectorNodePartitioning((double) numberOfBuckets / numberOfTasks >= getTableScanNodePartitioningMinBucketToTaskRatio(context.getSession())));
}
Also used : TablePartitioning(io.trino.metadata.TableProperties.TablePartitioning) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) TableProperties(io.trino.metadata.TableProperties)

Example 5 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class AbstractTestHive method testCreateBucketedTableLayout.

@Test
public void testCreateBucketedTableLayout() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        Optional<ConnectorTableLayout> newTableLayout = metadata.getNewTableLayout(session, new ConnectorTableMetadata(new SchemaTableName("schema", "table"), ImmutableList.of(new ColumnMetadata("column1", BIGINT), new ColumnMetadata("column2", BIGINT)), ImmutableMap.of(PARTITIONED_BY_PROPERTY, ImmutableList.of(), BUCKETED_BY_PROPERTY, ImmutableList.of("column1"), BUCKET_COUNT_PROPERTY, 10, SORTED_BY_PROPERTY, ImmutableList.of())));
        assertTrue(newTableLayout.isPresent());
        ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle(BUCKETING_V1, 10, ImmutableList.of(HIVE_LONG), OptionalInt.empty(), false);
        assertEquals(newTableLayout.get().getPartitioning(), Optional.of(partitioningHandle));
        assertEquals(newTableLayout.get().getPartitionColumns(), ImmutableList.of("column1"));
        ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMap(transaction.getTransactionHandle(), session, partitioningHandle);
        assertEquals(connectorBucketNodeMap.getBucketCount(), 10);
        assertFalse(connectorBucketNodeMap.hasFixedMapping());
    }
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Aggregations

ConnectorBucketNodeMap (io.trino.spi.connector.ConnectorBucketNodeMap)10 ConnectorPartitioningHandle (io.trino.spi.connector.ConnectorPartitioningHandle)5 ConnectorSession (io.trino.spi.connector.ConnectorSession)5 CatalogName (io.trino.connector.CatalogName)4 CatalogSchemaTableName (io.trino.spi.connector.CatalogSchemaTableName)4 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)4 ConnectorTableLayout (io.trino.spi.connector.ConnectorTableLayout)4 SchemaTableName (io.trino.spi.connector.SchemaTableName)4 TestingConnectorSession (io.trino.testing.TestingConnectorSession)4 Test (org.testng.annotations.Test)3 Session (io.trino.Session)2 FixedBucketNodeMap (io.trino.execution.scheduler.FixedBucketNodeMap)2 NodeScheduler (io.trino.execution.scheduler.NodeScheduler)2 DynamicBucketNodeMap (io.trino.execution.scheduler.group.DynamicBucketNodeMap)2 BucketPartitionFunction (io.trino.operator.BucketPartitionFunction)2 InterpretedHashGenerator (io.trino.operator.InterpretedHashGenerator)2 HiveColumnHandle.createBaseColumn (io.trino.plugin.hive.HiveColumnHandle.createBaseColumn)2 Column (io.trino.plugin.hive.metastore.Column)2 SortingColumn (io.trino.plugin.hive.metastore.SortingColumn)2 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)2