Search in sources :

Example 6 with SortingColumn

use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.

the class HiveBucketing method getHiveBucketHandle.

public static Optional<HiveBucketHandle> getHiveBucketHandle(ConnectorSession session, Table table, TypeManager typeManager) {
    if (table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY)) {
        return Optional.empty();
    }
    Optional<HiveBucketProperty> hiveBucketProperty = table.getStorage().getBucketProperty();
    if (hiveBucketProperty.isEmpty()) {
        return Optional.empty();
    }
    if (!isSupportedBucketing(table)) {
        return Optional.empty();
    }
    HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
    Map<String, HiveColumnHandle> map = getRegularColumnHandles(table, typeManager, timestampPrecision).stream().collect(Collectors.toMap(HiveColumnHandle::getName, identity()));
    ImmutableList.Builder<HiveColumnHandle> bucketColumns = ImmutableList.builder();
    for (String bucketColumnName : hiveBucketProperty.get().getBucketedBy()) {
        HiveColumnHandle bucketColumnHandle = map.get(bucketColumnName);
        if (bucketColumnHandle == null) {
            throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s.%s' is bucketed on non-existent column '%s'", table.getDatabaseName(), table.getTableName(), bucketColumnName));
        }
        bucketColumns.add(bucketColumnHandle);
    }
    BucketingVersion bucketingVersion = hiveBucketProperty.get().getBucketingVersion();
    int bucketCount = hiveBucketProperty.get().getBucketCount();
    List<SortingColumn> sortedBy = hiveBucketProperty.get().getSortedBy();
    return Optional.of(new HiveBucketHandle(bucketColumns.build(), bucketingVersion, bucketCount, bucketCount, sortedBy));
}
Also used : SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) TrinoException(io.trino.spi.TrinoException) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 7 with SortingColumn

use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.

the class ProtoUtils method fromProto.

static Optional<HiveBucketProperty> fromProto(Map<String, String> tableParameters, alluxio.grpc.table.layout.hive.HiveBucketProperty property) {
    // must return empty if buckets <= 0
    if (!property.hasBucketCount() || property.getBucketCount() <= 0) {
        return Optional.empty();
    }
    List<SortingColumn> sortedBy = property.getSortedByList().stream().map(ProtoUtils::fromProto).collect(toImmutableList());
    HiveBucketing.BucketingVersion bucketingVersion = HiveBucketing.getBucketingVersion(tableParameters);
    return Optional.of(new HiveBucketProperty(property.getBucketedByList(), bucketingVersion, (int) property.getBucketCount(), sortedBy));
}
Also used : HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) HiveBucketing(io.trino.plugin.hive.util.HiveBucketing)

Example 8 with SortingColumn

use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.

the class TestProtoUtils method testSortingColumn.

@Test
public void testSortingColumn() {
    alluxio.grpc.table.layout.hive.SortingColumn.Builder column = TestingAlluxioMetastoreObjects.getTestingSortingColumn();
    SortingColumn c = ProtoUtils.fromProto(column.build());
    assertEquals(column.getColumnName(), c.getColumnName());
    assertEquals(SortingColumn.Order.valueOf(column.getOrder().toString()), c.getOrder());
}
Also used : SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) Test(org.testng.annotations.Test)

Example 9 with SortingColumn

use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.

the class HivePageSinkProvider method createPageSink.

private ConnectorPageSink createPageSink(HiveWritableTableHandle handle, boolean isCreateTable, ConnectorSession session, Map<String, String> additionalTableParameters) {
    OptionalInt bucketCount = OptionalInt.empty();
    List<SortingColumn> sortedBy = ImmutableList.of();
    if (handle.getBucketProperty().isPresent()) {
        bucketCount = OptionalInt.of(handle.getBucketProperty().get().getBucketCount());
        sortedBy = handle.getBucketProperty().get().getSortedBy();
    }
    HiveWriterFactory writerFactory = new HiveWriterFactory(fileWriterFactories, handle.getSchemaName(), handle.getTableName(), isCreateTable, handle.getTransaction(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionStorageFormat(), additionalTableParameters, bucketCount, sortedBy, handle.getLocationHandle(), locationService, session.getQueryId(), new HivePageSinkMetadataProvider(handle.getPageSinkMetadata(), new HiveMetastoreClosure(memoizeMetastore(metastoreFactory.createMetastore(Optional.of(session.getIdentity())), perTransactionMetastoreCacheMaximumSize))), typeManager, hdfsEnvironment, pageSorter, writerSortBufferSize, maxOpenSortFiles, parquetTimeZone, session, nodeManager, eventClient, hiveSessionProperties, hiveWriterStats);
    return new HivePageSink(writerFactory, handle.getInputColumns(), handle.isTransactional(), handle.getBucketProperty(), pageIndexerFactory, hdfsEnvironment, maxOpenPartitions, writeVerificationExecutor, partitionUpdateCodec, session);
}
Also used : HivePageSinkMetadataProvider(io.trino.plugin.hive.metastore.HivePageSinkMetadataProvider) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) OptionalInt(java.util.OptionalInt)

Example 10 with SortingColumn

use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.

the class HiveSplitManager method getPartitionMetadata.

private Iterable<HivePartitionMetadata> getPartitionMetadata(ConnectorSession session, SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketProperty> bucketProperty) {
    if (hivePartitions.isEmpty()) {
        return ImmutableList.of();
    }
    if (hivePartitions.size() == 1) {
        HivePartition firstPartition = getOnlyElement(hivePartitions);
        if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
            return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty()));
        }
    }
    Optional<HiveStorageFormat> storageFormat = getHiveStorageFormat(table.getStorage().getStorageFormat());
    Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
    Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
        Map<String, Optional<Partition>> batch = metastore.getPartitionsByNames(tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
        ImmutableMap.Builder<String, Partition> partitionBuilder = ImmutableMap.builder();
        for (Map.Entry<String, Optional<Partition>> entry : batch.entrySet()) {
            if (entry.getValue().isEmpty()) {
                throw new TrinoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Partition no longer exists: " + entry.getKey());
            }
            partitionBuilder.put(entry.getKey(), entry.getValue().get());
        }
        Map<String, Partition> partitions = partitionBuilder.buildOrThrow();
        if (partitionBatch.size() != partitions.size()) {
            throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitions.size()));
        }
        ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
        for (HivePartition hivePartition : partitionBatch) {
            Partition partition = partitions.get(hivePartition.getPartitionId());
            if (partition == null) {
                throw new TrinoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
            }
            String partName = makePartitionName(table, partition);
            // verify partition is online
            verifyOnline(tableName, Optional.of(partName), getProtectMode(partition), partition.getParameters());
            // verify partition is not marked as non-readable
            String partitionNotReadable = partition.getParameters().get(OBJECT_NOT_READABLE);
            if (!isNullOrEmpty(partitionNotReadable)) {
                throw new HiveNotReadableException(tableName, Optional.of(partName), partitionNotReadable);
            }
            // Verify that the partition schema matches the table schema.
            // Either adding or dropping columns from the end of the table
            // without modifying existing partitions is allowed, but every
            // column that exists in both the table and partition must have
            // the same type.
            List<Column> tableColumns = table.getDataColumns();
            List<Column> partitionColumns = partition.getColumns();
            if ((tableColumns == null) || (partitionColumns == null)) {
                throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partName));
            }
            TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, storageFormat, tableName, partName, tableColumns, partitionColumns);
            if (bucketProperty.isPresent()) {
                Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
                if (partitionBucketProperty.isEmpty()) {
                    throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
                }
                int tableBucketCount = bucketProperty.get().getBucketCount();
                int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
                List<String> tableBucketColumns = bucketProperty.get().getBucketedBy();
                List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
                if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
                    throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
                }
                if (isPropagateTableScanSortingProperties(session)) {
                    List<SortingColumn> tableSortedColumns = bucketProperty.get().getSortedBy();
                    List<SortingColumn> partitionSortedColumns = partitionBucketProperty.get().getSortedBy();
                    if (!isSortingCompatible(tableSortedColumns, partitionSortedColumns)) {
                        throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) sorting by %s is not compatible with partition (%s) sorting by %s. This restriction can be avoided by disabling propagate_table_scan_sorting_properties.", hivePartition.getTableName(), tableSortedColumns.stream().map(HiveUtil::sortingColumnToString).collect(toImmutableList()), hivePartition.getPartitionId(), partitionSortedColumns.stream().map(HiveUtil::sortingColumnToString).collect(toImmutableList())));
                    }
                }
            }
            results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping));
        }
        return results.build();
    });
    return concat(partitionBatches);
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HiveStorageFormat.getHiveStorageFormat(io.trino.plugin.hive.HiveStorageFormat.getHiveStorageFormat) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Partition(io.trino.plugin.hive.metastore.Partition) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) Optional(java.util.Optional) ImmutableMap(com.google.common.collect.ImmutableMap) TrinoException(io.trino.spi.TrinoException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

SortingColumn (io.trino.plugin.hive.metastore.SortingColumn)13 TrinoException (io.trino.spi.TrinoException)7 Column (io.trino.plugin.hive.metastore.Column)5 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ConnectorSession (io.trino.spi.connector.ConnectorSession)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)3 Path (org.apache.hadoop.fs.Path)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)2 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Slice (io.airlift.slice.Slice)2 HiveBucketProperty (io.trino.plugin.hive.HiveBucketProperty)2 HiveType.toHiveType (io.trino.plugin.hive.HiveType.toHiveType)2 HivePageSinkMetadataProvider (io.trino.plugin.hive.metastore.HivePageSinkMetadataProvider)2 Partition (io.trino.plugin.hive.metastore.Partition)2 Block (io.trino.spi.block.Block)2 ColumnHandle (io.trino.spi.connector.ColumnHandle)2 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)2