Search in sources :

Example 6 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class AbstractTestHive method insertBucketedTableLayout.

protected void insertBucketedTableLayout(boolean transactional) throws Exception {
    SchemaTableName tableName = temporaryTable("empty_bucketed_table");
    try {
        List<Column> columns = ImmutableList.of(new Column("column1", HIVE_STRING, Optional.empty()), new Column("column2", HIVE_LONG, Optional.empty()));
        HiveBucketProperty bucketProperty = new HiveBucketProperty(ImmutableList.of("column1"), BUCKETING_V1, 4, ImmutableList.of());
        createEmptyTable(tableName, ORC, columns, ImmutableList.of(), Optional.of(bucketProperty), transactional);
        try (Transaction transaction = newTransaction()) {
            ConnectorMetadata metadata = transaction.getMetadata();
            ConnectorSession session = newSession();
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            Optional<ConnectorTableLayout> insertLayout = metadata.getInsertLayout(session, tableHandle);
            assertTrue(insertLayout.isPresent());
            ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle(bucketProperty.getBucketingVersion(), bucketProperty.getBucketCount(), ImmutableList.of(HIVE_STRING), OptionalInt.empty(), false);
            assertEquals(insertLayout.get().getPartitioning(), Optional.of(partitioningHandle));
            assertEquals(insertLayout.get().getPartitionColumns(), ImmutableList.of("column1"));
            ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMap(transaction.getTransactionHandle(), session, partitioningHandle);
            assertEquals(connectorBucketNodeMap.getBucketCount(), 4);
            assertFalse(connectorBucketNodeMap.hasFixedMapping());
        }
    } finally {
        dropTable(tableName);
    }
}
Also used : CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Column(io.trino.plugin.hive.metastore.Column) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata)

Example 7 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class AbstractTestHive method insertPartitionedBucketedTableLayout.

protected void insertPartitionedBucketedTableLayout(boolean transactional) throws Exception {
    SchemaTableName tableName = temporaryTable("empty_partitioned_table");
    try {
        Column partitioningColumn = new Column("column2", HIVE_LONG, Optional.empty());
        List<Column> columns = ImmutableList.of(new Column("column1", HIVE_STRING, Optional.empty()), partitioningColumn);
        HiveBucketProperty bucketProperty = new HiveBucketProperty(ImmutableList.of("column1"), BUCKETING_V1, 4, ImmutableList.of());
        createEmptyTable(tableName, ORC, columns, ImmutableList.of(partitioningColumn), Optional.of(bucketProperty), transactional);
        try (Transaction transaction = newTransaction()) {
            ConnectorMetadata metadata = transaction.getMetadata();
            ConnectorSession session = newSession();
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            Optional<ConnectorTableLayout> insertLayout = metadata.getInsertLayout(session, tableHandle);
            assertTrue(insertLayout.isPresent());
            ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle(bucketProperty.getBucketingVersion(), bucketProperty.getBucketCount(), ImmutableList.of(HIVE_STRING), OptionalInt.empty(), true);
            assertEquals(insertLayout.get().getPartitioning(), Optional.of(partitioningHandle));
            assertEquals(insertLayout.get().getPartitionColumns(), ImmutableList.of("column1", "column2"));
            ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMap(transaction.getTransactionHandle(), session, partitioningHandle);
            assertEquals(connectorBucketNodeMap.getBucketCount(), 32);
            assertTrue(connectorBucketNodeMap.hasFixedMapping());
            assertEquals(connectorBucketNodeMap.getFixedMapping().size(), 32);
        }
    } finally {
        dropTable(tableName);
    }
}
Also used : CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Column(io.trino.plugin.hive.metastore.Column) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata)

Example 8 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class AbstractTestHive method testCreatePartitionedBucketedTableLayout.

@Test
public void testCreatePartitionedBucketedTableLayout() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        Optional<ConnectorTableLayout> newTableLayout = metadata.getNewTableLayout(session, new ConnectorTableMetadata(new SchemaTableName("schema", "table"), ImmutableList.of(new ColumnMetadata("column1", BIGINT), new ColumnMetadata("column2", BIGINT)), ImmutableMap.of(PARTITIONED_BY_PROPERTY, ImmutableList.of("column2"), BUCKETED_BY_PROPERTY, ImmutableList.of("column1"), BUCKET_COUNT_PROPERTY, 10, SORTED_BY_PROPERTY, ImmutableList.of())));
        assertTrue(newTableLayout.isPresent());
        ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle(BUCKETING_V1, 10, ImmutableList.of(HIVE_LONG), OptionalInt.empty(), true);
        assertEquals(newTableLayout.get().getPartitioning(), Optional.of(partitioningHandle));
        assertEquals(newTableLayout.get().getPartitionColumns(), ImmutableList.of("column1", "column2"));
        ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMap(transaction.getTransactionHandle(), session, partitioningHandle);
        assertEquals(connectorBucketNodeMap.getBucketCount(), 32);
        assertTrue(connectorBucketNodeMap.hasFixedMapping());
        assertEquals(connectorBucketNodeMap.getFixedMapping().size(), 32);
    }
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 9 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class TestLocalExchange method testPartitionCustomPartitioning.

@Test(dataProvider = "executionStrategy")
public void testPartitionCustomPartitioning(PipelineExecutionStrategy executionStrategy) {
    ConnectorPartitioningHandle connectorPartitioningHandle = new ConnectorPartitioningHandle() {
    };
    ConnectorNodePartitioningProvider connectorNodePartitioningProvider = new ConnectorNodePartitioningProvider() {

        @Override
        public ConnectorBucketNodeMap getBucketNodeMap(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle) {
            return createBucketNodeMap(2);
        }

        @Override
        public ToIntFunction<ConnectorSplit> getSplitBucketFunction(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle) {
            throw new UnsupportedOperationException();
        }

        @Override
        public BucketFunction getBucketFunction(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle, List<Type> partitionChannelTypes, int bucketCount) {
            return (page, position) -> {
                long rowValue = BIGINT.getLong(page.getBlock(0), position);
                if (rowValue == 42) {
                    return 0;
                }
                return 1;
            };
        }
    };
    List<Type> types = ImmutableList.of(VARCHAR, BIGINT);
    nodePartitioningManager.addPartitioningProvider(new CatalogName("foo"), connectorNodePartitioningProvider);
    PartitioningHandle partitioningHandle = new PartitioningHandle(Optional.of(new CatalogName("foo")), Optional.of(TestingTransactionHandle.create()), connectorPartitioningHandle);
    LocalExchangeFactory localExchangeFactory = new LocalExchangeFactory(nodePartitioningManager, SESSION, partitioningHandle, 2, types, ImmutableList.of(1), Optional.empty(), executionStrategy, LOCAL_EXCHANGE_MAX_BUFFERED_BYTES, TYPE_OPERATOR_FACTORY);
    LocalExchangeSinkFactoryId localExchangeSinkFactoryId = localExchangeFactory.newSinkFactoryId();
    localExchangeFactory.noMoreSinkFactories();
    run(localExchangeFactory, executionStrategy, exchange -> {
        assertEquals(exchange.getBufferCount(), 2);
        assertExchangeTotalBufferedBytes(exchange, 0);
        LocalExchangeSinkFactory sinkFactory = exchange.getSinkFactory(localExchangeSinkFactoryId);
        LocalExchangeSink sink = sinkFactory.createSink();
        assertSinkCanWrite(sink);
        sinkFactory.close();
        sinkFactory.noMoreSinkFactories();
        LocalExchangeSource sourceA = exchange.getSource(1);
        assertSource(sourceA, 0);
        LocalExchangeSource sourceB = exchange.getSource(0);
        assertSource(sourceB, 0);
        Page pageA = SequencePageBuilder.createSequencePage(types, 1, 100, 42);
        sink.addPage(pageA);
        assertSource(sourceA, 1);
        assertSource(sourceB, 0);
        assertRemovePage(types, sourceA, pageA);
        assertSource(sourceA, 0);
        Page pageB = SequencePageBuilder.createSequencePage(types, 100, 100, 43);
        sink.addPage(pageB);
        assertSource(sourceA, 0);
        assertSource(sourceB, 1);
        assertRemovePage(types, sourceB, pageB);
        assertSource(sourceB, 0);
    });
}
Also used : BucketFunction(io.trino.spi.connector.BucketFunction) NodeTaskMap(io.trino.execution.NodeTaskMap) FIXED_HASH_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION) Test(org.testng.annotations.Test) TypeOperators(io.trino.spi.type.TypeOperators) SequencePageBuilder(io.trino.SequencePageBuilder) CatalogName(io.trino.connector.CatalogName) NodeScheduler(io.trino.execution.scheduler.NodeScheduler) FinalizerService(io.trino.util.FinalizerService) SINGLE_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION) Assert.assertFalse(org.testng.Assert.assertFalse) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) FIXED_BROADCAST_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION) NodeSchedulerConfig(io.trino.execution.scheduler.NodeSchedulerConfig) BeforeMethod(org.testng.annotations.BeforeMethod) PartitioningHandle(io.trino.sql.planner.PartitioningHandle) LocalExchangeSinkFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactory) Assert.assertNotNull(org.testng.Assert.assertNotNull) UniformNodeSelectorFactory(io.trino.execution.scheduler.UniformNodeSelectorFactory) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) FIXED_ARBITRARY_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION) DataSize(io.airlift.units.DataSize) List(java.util.List) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) BIGINT(io.trino.spi.type.BigintType.BIGINT) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) FIXED_PASSTHROUGH_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_PASSTHROUGH_DISTRIBUTION) Optional(java.util.Optional) Session(io.trino.Session) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) DataProvider(org.testng.annotations.DataProvider) Assert.assertNull(org.testng.Assert.assertNull) Type(io.trino.spi.type.Type) ConnectorBucketNodeMap.createBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap.createBucketNodeMap) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) NodePartitioningManager(io.trino.sql.planner.NodePartitioningManager) LocalExchangeFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeFactory) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) GROUPED_EXECUTION(io.trino.operator.PipelineExecutionStrategy.GROUPED_EXECUTION) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) PipelineExecutionStrategy(io.trino.operator.PipelineExecutionStrategy) Lifespan(io.trino.execution.Lifespan) InterpretedHashGenerator(io.trino.operator.InterpretedHashGenerator) BlockTypeOperators(io.trino.type.BlockTypeOperators) UNGROUPED_EXECUTION(io.trino.operator.PipelineExecutionStrategy.UNGROUPED_EXECUTION) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) PageAssertions(io.trino.operator.PageAssertions) ToIntFunction(java.util.function.ToIntFunction) LocalExchangeSinkFactoryId(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactoryId) TestingTransactionHandle(io.trino.testing.TestingTransactionHandle) ConnectorSession(io.trino.spi.connector.ConnectorSession) Consumer(java.util.function.Consumer) Assert.assertTrue(org.testng.Assert.assertTrue) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Page(io.trino.spi.Page) LocalExchangeSinkFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactory) Type(io.trino.spi.type.Type) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) ConnectorSession(io.trino.spi.connector.ConnectorSession) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CatalogName(io.trino.connector.CatalogName) LocalExchangeFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeFactory) PartitioningHandle(io.trino.sql.planner.PartitioningHandle) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) LocalExchangeSinkFactoryId(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactoryId) Test(org.testng.annotations.Test)

Example 10 with ConnectorBucketNodeMap

use of io.trino.spi.connector.ConnectorBucketNodeMap in project trino by trinodb.

the class LocalExchange method createPartitionFunction.

private static PartitionFunction createPartitionFunction(NodePartitioningManager nodePartitioningManager, Session session, BlockTypeOperators blockTypeOperators, PartitioningHandle partitioning, int partitionCount, List<Integer> partitionChannels, List<Type> partitionChannelTypes, Optional<Integer> partitionHashChannel) {
    checkArgument(Integer.bitCount(partitionCount) == 1, "partitionCount must be a power of 2");
    if (isSystemPartitioning(partitioning)) {
        HashGenerator hashGenerator;
        if (partitionHashChannel.isPresent()) {
            hashGenerator = new PrecomputedHashGenerator(partitionHashChannel.get());
        } else {
            hashGenerator = new InterpretedHashGenerator(partitionChannelTypes, Ints.toArray(partitionChannels), blockTypeOperators);
        }
        return new LocalPartitionGenerator(hashGenerator, partitionCount);
    }
    // Distribute buckets assigned to this node among threads.
    // The same bucket function (with the same bucket count) as for node
    // partitioning must be used. This way rows within a single bucket
    // will be being processed by single thread.
    ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningManager.getConnectorBucketNodeMap(session, partitioning);
    int bucketCount = connectorBucketNodeMap.getBucketCount();
    int[] bucketToPartition = new int[bucketCount];
    for (int bucket = 0; bucket < bucketCount; bucket++) {
        // mix the bucket bits so we don't use the same bucket number used to distribute between stages
        int hashedBucket = (int) XxHash64.hash(Long.reverse(bucket));
        bucketToPartition[bucket] = hashedBucket & (partitionCount - 1);
    }
    return new BucketPartitionFunction(nodePartitioningManager.getBucketFunction(session, partitioning, partitionChannelTypes, bucketCount), bucketToPartition);
}
Also used : BucketPartitionFunction(io.trino.operator.BucketPartitionFunction) InterpretedHashGenerator(io.trino.operator.InterpretedHashGenerator) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) PrecomputedHashGenerator(io.trino.operator.PrecomputedHashGenerator) PrecomputedHashGenerator(io.trino.operator.PrecomputedHashGenerator) InterpretedHashGenerator(io.trino.operator.InterpretedHashGenerator) HashGenerator(io.trino.operator.HashGenerator)

Aggregations

ConnectorBucketNodeMap (io.trino.spi.connector.ConnectorBucketNodeMap)10 ConnectorPartitioningHandle (io.trino.spi.connector.ConnectorPartitioningHandle)5 ConnectorSession (io.trino.spi.connector.ConnectorSession)5 CatalogName (io.trino.connector.CatalogName)4 CatalogSchemaTableName (io.trino.spi.connector.CatalogSchemaTableName)4 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)4 ConnectorTableLayout (io.trino.spi.connector.ConnectorTableLayout)4 SchemaTableName (io.trino.spi.connector.SchemaTableName)4 TestingConnectorSession (io.trino.testing.TestingConnectorSession)4 Test (org.testng.annotations.Test)3 Session (io.trino.Session)2 FixedBucketNodeMap (io.trino.execution.scheduler.FixedBucketNodeMap)2 NodeScheduler (io.trino.execution.scheduler.NodeScheduler)2 DynamicBucketNodeMap (io.trino.execution.scheduler.group.DynamicBucketNodeMap)2 BucketPartitionFunction (io.trino.operator.BucketPartitionFunction)2 InterpretedHashGenerator (io.trino.operator.InterpretedHashGenerator)2 HiveColumnHandle.createBaseColumn (io.trino.plugin.hive.HiveColumnHandle.createBaseColumn)2 Column (io.trino.plugin.hive.metastore.Column)2 SortingColumn (io.trino.plugin.hive.metastore.SortingColumn)2 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)2