Search in sources :

Example 6 with BucketFunction

use of io.trino.spi.connector.BucketFunction in project trino by trinodb.

the class TestHivePartitionedBucketFunction method testMultiplePartitions.

@Test(dataProvider = "hiveBucketingVersion")
public void testMultiplePartitions(BucketingVersion hiveBucketingVersion) {
    int numValues = 1024;
    int numBuckets = 10;
    Block bucketColumn = createLongSequenceBlockWithNull(numValues);
    Page bucketedColumnPage = new Page(bucketColumn);
    BucketFunction hiveBucketFunction = bucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG));
    int numPartitions = 8;
    List<Long> partitionValues = new ArrayList<>();
    for (int i = 0; i < numPartitions - 1; i++) {
        partitionValues.addAll(Collections.nCopies(numValues / numPartitions, i * 348349L));
    }
    partitionValues.addAll(Collections.nCopies(numValues / numPartitions, null));
    Block partitionColumn = createLongsBlock(partitionValues);
    Page page = new Page(bucketColumn, partitionColumn);
    Map<Long, HashMultimap<Integer, Integer>> partitionedBucketPositions = new HashMap<>();
    for (int i = 0; i < numValues; i++) {
        int hiveBucket = hiveBucketFunction.getBucket(bucketedColumnPage, i);
        Long hivePartition = partitionValues.get(i);
        // record list of positions for each combination of hive partition and bucket
        partitionedBucketPositions.computeIfAbsent(hivePartition, ignored -> HashMultimap.create()).put(hiveBucket, i);
    }
    BucketFunction hivePartitionedBucketFunction = partitionedBucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG), ImmutableList.of(BIGINT), 4000);
    // All positions of a hive partition and bucket should hash to the same partitioned bucket
    for (Map.Entry<Long, HashMultimap<Integer, Integer>> partitionEntry : partitionedBucketPositions.entrySet()) {
        for (Map.Entry<Integer, Collection<Integer>> entry : partitionEntry.getValue().asMap().entrySet()) {
            assertBucketCount(hivePartitionedBucketFunction, page, entry.getValue(), 1);
        }
    }
    assertBucketCount(hivePartitionedBucketFunction, page, IntStream.range(0, numValues).boxed().collect(toImmutableList()), numBuckets * numPartitions);
}
Also used : IntStream(java.util.stream.IntStream) BucketFunction(io.trino.spi.connector.BucketFunction) DataProvider(org.testng.annotations.DataProvider) Collections.min(java.util.Collections.min) Type(io.trino.spi.type.Type) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) HashMap(java.util.HashMap) TypeOperators(io.trino.spi.type.TypeOperators) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) HashMultimap(com.google.common.collect.HashMultimap) ImmutableList(com.google.common.collect.ImmutableList) Block(io.trino.spi.block.Block) Map(java.util.Map) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) BucketingVersion(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion) BIGINT(io.trino.spi.type.BigintType.BIGINT) Collections.max(java.util.Collections.max) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) BlockAssertions.createLongRepeatBlock(io.trino.block.BlockAssertions.createLongRepeatBlock) BlockBuilder(io.trino.spi.block.BlockBuilder) Collections(java.util.Collections) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Page(io.trino.spi.Page) HashMultimap(com.google.common.collect.HashMultimap) Block(io.trino.spi.block.Block) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) BlockAssertions.createLongRepeatBlock(io.trino.block.BlockAssertions.createLongRepeatBlock) Collection(java.util.Collection) BucketFunction(io.trino.spi.connector.BucketFunction) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.testng.annotations.Test)

Example 7 with BucketFunction

use of io.trino.spi.connector.BucketFunction in project trino by trinodb.

the class TestHivePartitionedBucketFunction method testSinglePartition.

@Test(dataProvider = "hiveBucketingVersion")
public void testSinglePartition(BucketingVersion hiveBucketingVersion) {
    int numValues = 1024;
    int numBuckets = 10;
    Block bucketColumn = createLongSequenceBlockWithNull(numValues);
    Page bucketedColumnPage = new Page(bucketColumn);
    Block partitionColumn = createLongRepeatBlock(78758, numValues);
    Page page = new Page(bucketColumn, partitionColumn);
    BucketFunction hiveBucketFunction = bucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG));
    Multimap<Integer, Integer> bucketPositions = HashMultimap.create();
    for (int i = 0; i < numValues; i++) {
        int hiveBucket = hiveBucketFunction.getBucket(bucketedColumnPage, i);
        // record list of positions for each hive bucket
        bucketPositions.put(hiveBucket, i);
    }
    BucketFunction hivePartitionedBucketFunction = partitionedBucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG), ImmutableList.of(BIGINT), 100);
    // All positions of a bucket should hash to the same partitioned bucket
    for (Map.Entry<Integer, Collection<Integer>> entry : bucketPositions.asMap().entrySet()) {
        assertBucketCount(hivePartitionedBucketFunction, page, entry.getValue(), 1);
    }
    assertBucketCount(hivePartitionedBucketFunction, page, IntStream.range(0, numValues).boxed().collect(toImmutableList()), numBuckets);
}
Also used : Block(io.trino.spi.block.Block) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) BlockAssertions.createLongRepeatBlock(io.trino.block.BlockAssertions.createLongRepeatBlock) Collection(java.util.Collection) Page(io.trino.spi.Page) BucketFunction(io.trino.spi.connector.BucketFunction) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.testng.annotations.Test)

Example 8 with BucketFunction

use of io.trino.spi.connector.BucketFunction in project trino by trinodb.

the class TestLocalExchange method testPartitionCustomPartitioning.

@Test(dataProvider = "executionStrategy")
public void testPartitionCustomPartitioning(PipelineExecutionStrategy executionStrategy) {
    ConnectorPartitioningHandle connectorPartitioningHandle = new ConnectorPartitioningHandle() {
    };
    ConnectorNodePartitioningProvider connectorNodePartitioningProvider = new ConnectorNodePartitioningProvider() {

        @Override
        public ConnectorBucketNodeMap getBucketNodeMap(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle) {
            return createBucketNodeMap(2);
        }

        @Override
        public ToIntFunction<ConnectorSplit> getSplitBucketFunction(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle) {
            throw new UnsupportedOperationException();
        }

        @Override
        public BucketFunction getBucketFunction(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle, List<Type> partitionChannelTypes, int bucketCount) {
            return (page, position) -> {
                long rowValue = BIGINT.getLong(page.getBlock(0), position);
                if (rowValue == 42) {
                    return 0;
                }
                return 1;
            };
        }
    };
    List<Type> types = ImmutableList.of(VARCHAR, BIGINT);
    nodePartitioningManager.addPartitioningProvider(new CatalogName("foo"), connectorNodePartitioningProvider);
    PartitioningHandle partitioningHandle = new PartitioningHandle(Optional.of(new CatalogName("foo")), Optional.of(TestingTransactionHandle.create()), connectorPartitioningHandle);
    LocalExchangeFactory localExchangeFactory = new LocalExchangeFactory(nodePartitioningManager, SESSION, partitioningHandle, 2, types, ImmutableList.of(1), Optional.empty(), executionStrategy, LOCAL_EXCHANGE_MAX_BUFFERED_BYTES, TYPE_OPERATOR_FACTORY);
    LocalExchangeSinkFactoryId localExchangeSinkFactoryId = localExchangeFactory.newSinkFactoryId();
    localExchangeFactory.noMoreSinkFactories();
    run(localExchangeFactory, executionStrategy, exchange -> {
        assertEquals(exchange.getBufferCount(), 2);
        assertExchangeTotalBufferedBytes(exchange, 0);
        LocalExchangeSinkFactory sinkFactory = exchange.getSinkFactory(localExchangeSinkFactoryId);
        LocalExchangeSink sink = sinkFactory.createSink();
        assertSinkCanWrite(sink);
        sinkFactory.close();
        sinkFactory.noMoreSinkFactories();
        LocalExchangeSource sourceA = exchange.getSource(1);
        assertSource(sourceA, 0);
        LocalExchangeSource sourceB = exchange.getSource(0);
        assertSource(sourceB, 0);
        Page pageA = SequencePageBuilder.createSequencePage(types, 1, 100, 42);
        sink.addPage(pageA);
        assertSource(sourceA, 1);
        assertSource(sourceB, 0);
        assertRemovePage(types, sourceA, pageA);
        assertSource(sourceA, 0);
        Page pageB = SequencePageBuilder.createSequencePage(types, 100, 100, 43);
        sink.addPage(pageB);
        assertSource(sourceA, 0);
        assertSource(sourceB, 1);
        assertRemovePage(types, sourceB, pageB);
        assertSource(sourceB, 0);
    });
}
Also used : BucketFunction(io.trino.spi.connector.BucketFunction) NodeTaskMap(io.trino.execution.NodeTaskMap) FIXED_HASH_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION) Test(org.testng.annotations.Test) TypeOperators(io.trino.spi.type.TypeOperators) SequencePageBuilder(io.trino.SequencePageBuilder) CatalogName(io.trino.connector.CatalogName) NodeScheduler(io.trino.execution.scheduler.NodeScheduler) FinalizerService(io.trino.util.FinalizerService) SINGLE_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION) Assert.assertFalse(org.testng.Assert.assertFalse) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) FIXED_BROADCAST_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION) NodeSchedulerConfig(io.trino.execution.scheduler.NodeSchedulerConfig) BeforeMethod(org.testng.annotations.BeforeMethod) PartitioningHandle(io.trino.sql.planner.PartitioningHandle) LocalExchangeSinkFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactory) Assert.assertNotNull(org.testng.Assert.assertNotNull) UniformNodeSelectorFactory(io.trino.execution.scheduler.UniformNodeSelectorFactory) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) FIXED_ARBITRARY_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION) DataSize(io.airlift.units.DataSize) List(java.util.List) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) BIGINT(io.trino.spi.type.BigintType.BIGINT) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) FIXED_PASSTHROUGH_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_PASSTHROUGH_DISTRIBUTION) Optional(java.util.Optional) Session(io.trino.Session) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) DataProvider(org.testng.annotations.DataProvider) Assert.assertNull(org.testng.Assert.assertNull) Type(io.trino.spi.type.Type) ConnectorBucketNodeMap.createBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap.createBucketNodeMap) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) NodePartitioningManager(io.trino.sql.planner.NodePartitioningManager) LocalExchangeFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeFactory) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) GROUPED_EXECUTION(io.trino.operator.PipelineExecutionStrategy.GROUPED_EXECUTION) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) PipelineExecutionStrategy(io.trino.operator.PipelineExecutionStrategy) Lifespan(io.trino.execution.Lifespan) InterpretedHashGenerator(io.trino.operator.InterpretedHashGenerator) BlockTypeOperators(io.trino.type.BlockTypeOperators) UNGROUPED_EXECUTION(io.trino.operator.PipelineExecutionStrategy.UNGROUPED_EXECUTION) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) PageAssertions(io.trino.operator.PageAssertions) ToIntFunction(java.util.function.ToIntFunction) LocalExchangeSinkFactoryId(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactoryId) TestingTransactionHandle(io.trino.testing.TestingTransactionHandle) ConnectorSession(io.trino.spi.connector.ConnectorSession) Consumer(java.util.function.Consumer) Assert.assertTrue(org.testng.Assert.assertTrue) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Page(io.trino.spi.Page) LocalExchangeSinkFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactory) Type(io.trino.spi.type.Type) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) ConnectorSession(io.trino.spi.connector.ConnectorSession) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CatalogName(io.trino.connector.CatalogName) LocalExchangeFactory(io.trino.operator.exchange.LocalExchange.LocalExchangeFactory) PartitioningHandle(io.trino.sql.planner.PartitioningHandle) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) LocalExchangeSinkFactoryId(io.trino.operator.exchange.LocalExchange.LocalExchangeSinkFactoryId) Test(org.testng.annotations.Test)

Example 9 with BucketFunction

use of io.trino.spi.connector.BucketFunction in project trino by trinodb.

the class NodePartitioningManager method getBucketFunction.

public BucketFunction getBucketFunction(Session session, PartitioningHandle partitioningHandle, List<Type> partitionChannelTypes, int bucketCount) {
    CatalogName catalogName = partitioningHandle.getConnectorId().orElseThrow(() -> new IllegalArgumentException("No connector ID for partitioning handle: " + partitioningHandle));
    ConnectorNodePartitioningProvider partitioningProvider = getPartitioningProvider(catalogName);
    BucketFunction bucketFunction = partitioningProvider.getBucketFunction(partitioningHandle.getTransactionHandle().orElseThrow(() -> new IllegalArgumentException("No transactionHandle for partitioning handle: " + partitioningHandle)), session.toConnectorSession(), partitioningHandle.getConnectorHandle(), partitionChannelTypes, bucketCount);
    checkArgument(bucketFunction != null, "No bucket function for partitioning: %s", partitioningHandle);
    return bucketFunction;
}
Also used : ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) CatalogName(io.trino.connector.CatalogName) BucketFunction(io.trino.spi.connector.BucketFunction)

Example 10 with BucketFunction

use of io.trino.spi.connector.BucketFunction in project trino by trinodb.

the class NodePartitioningManager method getPartitionFunction.

public PartitionFunction getPartitionFunction(Session session, PartitioningScheme partitioningScheme, List<Type> partitionChannelTypes) {
    int[] bucketToPartition = partitioningScheme.getBucketToPartition().orElseThrow(() -> new IllegalArgumentException("Bucket to partition must be set before a partition function can be created"));
    PartitioningHandle partitioningHandle = partitioningScheme.getPartitioning().getHandle();
    if (partitioningHandle.getConnectorHandle() instanceof SystemPartitioningHandle) {
        return ((SystemPartitioningHandle) partitioningHandle.getConnectorHandle()).getPartitionFunction(partitionChannelTypes, partitioningScheme.getHashColumn().isPresent(), bucketToPartition, blockTypeOperators);
    }
    BucketFunction bucketFunction = getBucketFunction(session, partitioningHandle, partitionChannelTypes, bucketToPartition.length);
    return new BucketPartitionFunction(bucketFunction, bucketToPartition);
}
Also used : BucketPartitionFunction(io.trino.operator.BucketPartitionFunction) BucketFunction(io.trino.spi.connector.BucketFunction)

Aggregations

BucketFunction (io.trino.spi.connector.BucketFunction)11 Test (org.testng.annotations.Test)8 Page (io.trino.spi.Page)4 ImmutableList (com.google.common.collect.ImmutableList)2 BlockAssertions.createLongRepeatBlock (io.trino.block.BlockAssertions.createLongRepeatBlock)2 BlockAssertions.createLongsBlock (io.trino.block.BlockAssertions.createLongsBlock)2 CatalogName (io.trino.connector.CatalogName)2 BucketPartitionFunction (io.trino.operator.BucketPartitionFunction)2 Block (io.trino.spi.block.Block)2 BlockBuilder (io.trino.spi.block.BlockBuilder)2 ConnectorNodePartitioningProvider (io.trino.spi.connector.ConnectorNodePartitioningProvider)2 BIGINT (io.trino.spi.type.BigintType.BIGINT)2 Type (io.trino.spi.type.Type)2 TypeOperators (io.trino.spi.type.TypeOperators)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Assert.assertEquals (org.testng.Assert.assertEquals)2