use of io.trino.spi.connector.BucketFunction in project trino by trinodb.
the class TestHivePartitionedBucketFunction method testMultiplePartitions.
@Test(dataProvider = "hiveBucketingVersion")
public void testMultiplePartitions(BucketingVersion hiveBucketingVersion) {
int numValues = 1024;
int numBuckets = 10;
Block bucketColumn = createLongSequenceBlockWithNull(numValues);
Page bucketedColumnPage = new Page(bucketColumn);
BucketFunction hiveBucketFunction = bucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG));
int numPartitions = 8;
List<Long> partitionValues = new ArrayList<>();
for (int i = 0; i < numPartitions - 1; i++) {
partitionValues.addAll(Collections.nCopies(numValues / numPartitions, i * 348349L));
}
partitionValues.addAll(Collections.nCopies(numValues / numPartitions, null));
Block partitionColumn = createLongsBlock(partitionValues);
Page page = new Page(bucketColumn, partitionColumn);
Map<Long, HashMultimap<Integer, Integer>> partitionedBucketPositions = new HashMap<>();
for (int i = 0; i < numValues; i++) {
int hiveBucket = hiveBucketFunction.getBucket(bucketedColumnPage, i);
Long hivePartition = partitionValues.get(i);
// record list of positions for each combination of hive partition and bucket
partitionedBucketPositions.computeIfAbsent(hivePartition, ignored -> HashMultimap.create()).put(hiveBucket, i);
}
BucketFunction hivePartitionedBucketFunction = partitionedBucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG), ImmutableList.of(BIGINT), 4000);
// All positions of a hive partition and bucket should hash to the same partitioned bucket
for (Map.Entry<Long, HashMultimap<Integer, Integer>> partitionEntry : partitionedBucketPositions.entrySet()) {
for (Map.Entry<Integer, Collection<Integer>> entry : partitionEntry.getValue().asMap().entrySet()) {
assertBucketCount(hivePartitionedBucketFunction, page, entry.getValue(), 1);
}
}
assertBucketCount(hivePartitionedBucketFunction, page, IntStream.range(0, numValues).boxed().collect(toImmutableList()), numBuckets * numPartitions);
}
use of io.trino.spi.connector.BucketFunction in project trino by trinodb.
the class TestHivePartitionedBucketFunction method testSinglePartition.
@Test(dataProvider = "hiveBucketingVersion")
public void testSinglePartition(BucketingVersion hiveBucketingVersion) {
int numValues = 1024;
int numBuckets = 10;
Block bucketColumn = createLongSequenceBlockWithNull(numValues);
Page bucketedColumnPage = new Page(bucketColumn);
Block partitionColumn = createLongRepeatBlock(78758, numValues);
Page page = new Page(bucketColumn, partitionColumn);
BucketFunction hiveBucketFunction = bucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG));
Multimap<Integer, Integer> bucketPositions = HashMultimap.create();
for (int i = 0; i < numValues; i++) {
int hiveBucket = hiveBucketFunction.getBucket(bucketedColumnPage, i);
// record list of positions for each hive bucket
bucketPositions.put(hiveBucket, i);
}
BucketFunction hivePartitionedBucketFunction = partitionedBucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG), ImmutableList.of(BIGINT), 100);
// All positions of a bucket should hash to the same partitioned bucket
for (Map.Entry<Integer, Collection<Integer>> entry : bucketPositions.asMap().entrySet()) {
assertBucketCount(hivePartitionedBucketFunction, page, entry.getValue(), 1);
}
assertBucketCount(hivePartitionedBucketFunction, page, IntStream.range(0, numValues).boxed().collect(toImmutableList()), numBuckets);
}
use of io.trino.spi.connector.BucketFunction in project trino by trinodb.
the class TestLocalExchange method testPartitionCustomPartitioning.
@Test(dataProvider = "executionStrategy")
public void testPartitionCustomPartitioning(PipelineExecutionStrategy executionStrategy) {
ConnectorPartitioningHandle connectorPartitioningHandle = new ConnectorPartitioningHandle() {
};
ConnectorNodePartitioningProvider connectorNodePartitioningProvider = new ConnectorNodePartitioningProvider() {
@Override
public ConnectorBucketNodeMap getBucketNodeMap(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle) {
return createBucketNodeMap(2);
}
@Override
public ToIntFunction<ConnectorSplit> getSplitBucketFunction(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle) {
throw new UnsupportedOperationException();
}
@Override
public BucketFunction getBucketFunction(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle, List<Type> partitionChannelTypes, int bucketCount) {
return (page, position) -> {
long rowValue = BIGINT.getLong(page.getBlock(0), position);
if (rowValue == 42) {
return 0;
}
return 1;
};
}
};
List<Type> types = ImmutableList.of(VARCHAR, BIGINT);
nodePartitioningManager.addPartitioningProvider(new CatalogName("foo"), connectorNodePartitioningProvider);
PartitioningHandle partitioningHandle = new PartitioningHandle(Optional.of(new CatalogName("foo")), Optional.of(TestingTransactionHandle.create()), connectorPartitioningHandle);
LocalExchangeFactory localExchangeFactory = new LocalExchangeFactory(nodePartitioningManager, SESSION, partitioningHandle, 2, types, ImmutableList.of(1), Optional.empty(), executionStrategy, LOCAL_EXCHANGE_MAX_BUFFERED_BYTES, TYPE_OPERATOR_FACTORY);
LocalExchangeSinkFactoryId localExchangeSinkFactoryId = localExchangeFactory.newSinkFactoryId();
localExchangeFactory.noMoreSinkFactories();
run(localExchangeFactory, executionStrategy, exchange -> {
assertEquals(exchange.getBufferCount(), 2);
assertExchangeTotalBufferedBytes(exchange, 0);
LocalExchangeSinkFactory sinkFactory = exchange.getSinkFactory(localExchangeSinkFactoryId);
LocalExchangeSink sink = sinkFactory.createSink();
assertSinkCanWrite(sink);
sinkFactory.close();
sinkFactory.noMoreSinkFactories();
LocalExchangeSource sourceA = exchange.getSource(1);
assertSource(sourceA, 0);
LocalExchangeSource sourceB = exchange.getSource(0);
assertSource(sourceB, 0);
Page pageA = SequencePageBuilder.createSequencePage(types, 1, 100, 42);
sink.addPage(pageA);
assertSource(sourceA, 1);
assertSource(sourceB, 0);
assertRemovePage(types, sourceA, pageA);
assertSource(sourceA, 0);
Page pageB = SequencePageBuilder.createSequencePage(types, 100, 100, 43);
sink.addPage(pageB);
assertSource(sourceA, 0);
assertSource(sourceB, 1);
assertRemovePage(types, sourceB, pageB);
assertSource(sourceB, 0);
});
}
use of io.trino.spi.connector.BucketFunction in project trino by trinodb.
the class NodePartitioningManager method getBucketFunction.
public BucketFunction getBucketFunction(Session session, PartitioningHandle partitioningHandle, List<Type> partitionChannelTypes, int bucketCount) {
CatalogName catalogName = partitioningHandle.getConnectorId().orElseThrow(() -> new IllegalArgumentException("No connector ID for partitioning handle: " + partitioningHandle));
ConnectorNodePartitioningProvider partitioningProvider = getPartitioningProvider(catalogName);
BucketFunction bucketFunction = partitioningProvider.getBucketFunction(partitioningHandle.getTransactionHandle().orElseThrow(() -> new IllegalArgumentException("No transactionHandle for partitioning handle: " + partitioningHandle)), session.toConnectorSession(), partitioningHandle.getConnectorHandle(), partitionChannelTypes, bucketCount);
checkArgument(bucketFunction != null, "No bucket function for partitioning: %s", partitioningHandle);
return bucketFunction;
}
use of io.trino.spi.connector.BucketFunction in project trino by trinodb.
the class NodePartitioningManager method getPartitionFunction.
public PartitionFunction getPartitionFunction(Session session, PartitioningScheme partitioningScheme, List<Type> partitionChannelTypes) {
int[] bucketToPartition = partitioningScheme.getBucketToPartition().orElseThrow(() -> new IllegalArgumentException("Bucket to partition must be set before a partition function can be created"));
PartitioningHandle partitioningHandle = partitioningScheme.getPartitioning().getHandle();
if (partitioningHandle.getConnectorHandle() instanceof SystemPartitioningHandle) {
return ((SystemPartitioningHandle) partitioningHandle.getConnectorHandle()).getPartitionFunction(partitionChannelTypes, partitioningScheme.getHashColumn().isPresent(), bucketToPartition, blockTypeOperators);
}
BucketFunction bucketFunction = getBucketFunction(session, partitioningHandle, partitionChannelTypes, bucketToPartition.length);
return new BucketPartitionFunction(bucketFunction, bucketToPartition);
}
Aggregations