Search in sources :

Example 1 with HashBucketSchema

use of org.apache.kudu.client.PartitionSchema.HashBucketSchema in project trino by trinodb.

the class KuduMetadata method getTableHandle.

@Override
public KuduTableHandle getTableHandle(ConnectorSession session, SchemaTableName schemaTableName) {
    try {
        KuduTable table = clientSession.openTable(schemaTableName);
        OptionalInt bucketCount = OptionalInt.empty();
        List<HashBucketSchema> bucketSchemas = table.getPartitionSchema().getHashBucketSchemas();
        if (!bucketSchemas.isEmpty()) {
            bucketCount = OptionalInt.of(bucketSchemas.stream().mapToInt(HashBucketSchema::getNumBuckets).reduce(1, Math::multiplyExact));
        }
        return new KuduTableHandle(schemaTableName, table, TupleDomain.all(), Optional.empty(), false, bucketCount, OptionalLong.empty());
    } catch (NotFoundException e) {
        return null;
    }
}
Also used : HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) NotFoundException(io.trino.spi.connector.NotFoundException) KuduTable(org.apache.kudu.client.KuduTable) OptionalInt(java.util.OptionalInt)

Example 2 with HashBucketSchema

use of org.apache.kudu.client.PartitionSchema.HashBucketSchema in project trino by trinodb.

the class KuduBucketFunction method getBucket.

@Override
public int getBucket(Page page, int position) {
    checkArgument(this.bucketChannelTypes.size() == page.getChannelCount());
    PartialRow partialRow = new PartialRow(this.schema);
    List<Integer> bucketIds = this.hashBucketSchemas.stream().map(hashBucketSchema -> this.calculateSchemaLevelBucketId(page, partialRow, hashBucketSchema, position)).collect(toImmutableList());
    return getBucket(bucketIds, hashBucketSchemas);
}
Also used : BucketFunction(io.trino.spi.connector.BucketFunction) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) Shorts(com.google.common.primitives.Shorts) Schema(org.apache.kudu.Schema) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Block(io.trino.spi.block.Block) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.toIntExact(java.lang.Math.toIntExact) INTEGER(io.trino.spi.type.IntegerType.INTEGER) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PartialRow(org.apache.kudu.client.PartialRow) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) SignedBytes(com.google.common.primitives.SignedBytes) KuduTable(org.apache.kudu.client.KuduTable) KeyEncoderAccessor(org.apache.kudu.client.KeyEncoderAccessor) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) TINYINT(io.trino.spi.type.TinyintType.TINYINT) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) PartialRow(org.apache.kudu.client.PartialRow)

Example 3 with HashBucketSchema

use of org.apache.kudu.client.PartitionSchema.HashBucketSchema in project trino by trinodb.

the class KuduClientSession method buildKuduSplits.

public List<KuduSplit> buildKuduSplits(KuduTableHandle tableHandle, DynamicFilter dynamicFilter) {
    KuduTable table = tableHandle.getTable(this);
    int primaryKeyColumnCount = table.getSchema().getPrimaryKeyColumnCount();
    KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(table);
    // TODO: remove when kudu client bug is fixed: https://gerrit.cloudera.org/#/c/18166/
    builder.includeTabletMetadata(false);
    TupleDomain<ColumnHandle> constraint = tableHandle.getConstraint().intersect(dynamicFilter.getCurrentPredicate().simplify(100));
    if (constraint.isNone()) {
        return ImmutableList.of();
    }
    addConstraintPredicates(table, builder, constraint);
    Optional<List<ColumnHandle>> desiredColumns = tableHandle.getDesiredColumns();
    List<Integer> columnIndexes;
    if (tableHandle.isDeleteHandle()) {
        if (desiredColumns.isPresent()) {
            columnIndexes = IntStream.range(0, primaryKeyColumnCount).boxed().collect(toList());
            for (ColumnHandle column : desiredColumns.get()) {
                KuduColumnHandle k = (KuduColumnHandle) column;
                int index = k.getOrdinalPosition();
                if (index >= primaryKeyColumnCount) {
                    columnIndexes.add(index);
                }
            }
            columnIndexes = ImmutableList.copyOf(columnIndexes);
        } else {
            columnIndexes = IntStream.range(0, table.getSchema().getColumnCount()).boxed().collect(toImmutableList());
        }
    } else {
        if (desiredColumns.isPresent()) {
            columnIndexes = desiredColumns.get().stream().map(handle -> ((KuduColumnHandle) handle).getOrdinalPosition()).collect(toImmutableList());
        } else {
            ImmutableList.Builder<Integer> columnIndexesBuilder = ImmutableList.builder();
            Schema schema = table.getSchema();
            for (int ordinal = 0; ordinal < schema.getColumnCount(); ordinal++) {
                ColumnSchema column = schema.getColumnByIndex(ordinal);
                // Skip hidden "row_uuid" column
                if (!column.isKey() || !column.getName().equals(KuduColumnHandle.ROW_ID)) {
                    columnIndexesBuilder.add(ordinal);
                }
            }
            columnIndexes = columnIndexesBuilder.build();
        }
    }
    builder.setProjectedColumnIndexes(columnIndexes);
    tableHandle.getLimit().ifPresent(builder::limit);
    List<KuduScanToken> tokens = builder.build();
    ImmutableList.Builder<KuduSplit> tokenBuilder = ImmutableList.builder();
    List<HashBucketSchema> hashBucketSchemas = table.getPartitionSchema().getHashBucketSchemas();
    for (KuduScanToken token : tokens) {
        List<Integer> hashBuckets = token.getTablet().getPartition().getHashBuckets();
        int bucket = KuduBucketFunction.getBucket(hashBuckets, hashBucketSchemas);
        tokenBuilder.add(toKuduSplit(tableHandle, token, primaryKeyColumnCount, bucket));
    }
    return tokenBuilder.build();
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) KuduScanToken(org.apache.kudu.client.KuduScanToken) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Schema(org.apache.kudu.Schema) ColumnSchema(org.apache.kudu.ColumnSchema) HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) KuduTable(org.apache.kudu.client.KuduTable) ColumnSchema(org.apache.kudu.ColumnSchema) HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList)

Aggregations

KuduTable (org.apache.kudu.client.KuduTable)3 HashBucketSchema (org.apache.kudu.client.PartitionSchema.HashBucketSchema)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 List (java.util.List)2 Schema (org.apache.kudu.Schema)2 MoreObjects.toStringHelper (com.google.common.base.MoreObjects.toStringHelper)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Shorts (com.google.common.primitives.Shorts)1 SignedBytes (com.google.common.primitives.SignedBytes)1 Page (io.trino.spi.Page)1 Block (io.trino.spi.block.Block)1 BucketFunction (io.trino.spi.connector.BucketFunction)1 ColumnHandle (io.trino.spi.connector.ColumnHandle)1 NotFoundException (io.trino.spi.connector.NotFoundException)1 BIGINT (io.trino.spi.type.BigintType.BIGINT)1 BOOLEAN (io.trino.spi.type.BooleanType.BOOLEAN)1 DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)1 INTEGER (io.trino.spi.type.IntegerType.INTEGER)1 SMALLINT (io.trino.spi.type.SmallintType.SMALLINT)1