use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.
the class HiveBucketing method getHiveBucketHandle.
public static Optional<HiveBucketHandle> getHiveBucketHandle(ConnectorSession session, Table table, TypeManager typeManager) {
if (table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY)) {
return Optional.empty();
}
Optional<HiveBucketProperty> hiveBucketProperty = table.getStorage().getBucketProperty();
if (hiveBucketProperty.isEmpty()) {
return Optional.empty();
}
if (!isSupportedBucketing(table)) {
return Optional.empty();
}
HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
Map<String, HiveColumnHandle> map = getRegularColumnHandles(table, typeManager, timestampPrecision).stream().collect(Collectors.toMap(HiveColumnHandle::getName, identity()));
ImmutableList.Builder<HiveColumnHandle> bucketColumns = ImmutableList.builder();
for (String bucketColumnName : hiveBucketProperty.get().getBucketedBy()) {
HiveColumnHandle bucketColumnHandle = map.get(bucketColumnName);
if (bucketColumnHandle == null) {
throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s.%s' is bucketed on non-existent column '%s'", table.getDatabaseName(), table.getTableName(), bucketColumnName));
}
bucketColumns.add(bucketColumnHandle);
}
BucketingVersion bucketingVersion = hiveBucketProperty.get().getBucketingVersion();
int bucketCount = hiveBucketProperty.get().getBucketCount();
List<SortingColumn> sortedBy = hiveBucketProperty.get().getSortedBy();
return Optional.of(new HiveBucketHandle(bucketColumns.build(), bucketingVersion, bucketCount, bucketCount, sortedBy));
}
use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.
the class ProtoUtils method fromProto.
static Optional<HiveBucketProperty> fromProto(Map<String, String> tableParameters, alluxio.grpc.table.layout.hive.HiveBucketProperty property) {
// must return empty if buckets <= 0
if (!property.hasBucketCount() || property.getBucketCount() <= 0) {
return Optional.empty();
}
List<SortingColumn> sortedBy = property.getSortedByList().stream().map(ProtoUtils::fromProto).collect(toImmutableList());
HiveBucketing.BucketingVersion bucketingVersion = HiveBucketing.getBucketingVersion(tableParameters);
return Optional.of(new HiveBucketProperty(property.getBucketedByList(), bucketingVersion, (int) property.getBucketCount(), sortedBy));
}
use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.
the class TestProtoUtils method testSortingColumn.
@Test
public void testSortingColumn() {
alluxio.grpc.table.layout.hive.SortingColumn.Builder column = TestingAlluxioMetastoreObjects.getTestingSortingColumn();
SortingColumn c = ProtoUtils.fromProto(column.build());
assertEquals(column.getColumnName(), c.getColumnName());
assertEquals(SortingColumn.Order.valueOf(column.getOrder().toString()), c.getOrder());
}
use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.
the class HivePageSinkProvider method createPageSink.
private ConnectorPageSink createPageSink(HiveWritableTableHandle handle, boolean isCreateTable, ConnectorSession session, Map<String, String> additionalTableParameters) {
OptionalInt bucketCount = OptionalInt.empty();
List<SortingColumn> sortedBy = ImmutableList.of();
if (handle.getBucketProperty().isPresent()) {
bucketCount = OptionalInt.of(handle.getBucketProperty().get().getBucketCount());
sortedBy = handle.getBucketProperty().get().getSortedBy();
}
HiveWriterFactory writerFactory = new HiveWriterFactory(fileWriterFactories, handle.getSchemaName(), handle.getTableName(), isCreateTable, handle.getTransaction(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionStorageFormat(), additionalTableParameters, bucketCount, sortedBy, handle.getLocationHandle(), locationService, session.getQueryId(), new HivePageSinkMetadataProvider(handle.getPageSinkMetadata(), new HiveMetastoreClosure(memoizeMetastore(metastoreFactory.createMetastore(Optional.of(session.getIdentity())), perTransactionMetastoreCacheMaximumSize))), typeManager, hdfsEnvironment, pageSorter, writerSortBufferSize, maxOpenSortFiles, parquetTimeZone, session, nodeManager, eventClient, hiveSessionProperties, hiveWriterStats);
return new HivePageSink(writerFactory, handle.getInputColumns(), handle.isTransactional(), handle.getBucketProperty(), pageIndexerFactory, hdfsEnvironment, maxOpenPartitions, writeVerificationExecutor, partitionUpdateCodec, session);
}
use of io.trino.plugin.hive.metastore.SortingColumn in project trino by trinodb.
the class HiveSplitManager method getPartitionMetadata.
private Iterable<HivePartitionMetadata> getPartitionMetadata(ConnectorSession session, SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketProperty> bucketProperty) {
if (hivePartitions.isEmpty()) {
return ImmutableList.of();
}
if (hivePartitions.size() == 1) {
HivePartition firstPartition = getOnlyElement(hivePartitions);
if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty()));
}
}
Optional<HiveStorageFormat> storageFormat = getHiveStorageFormat(table.getStorage().getStorageFormat());
Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
Map<String, Optional<Partition>> batch = metastore.getPartitionsByNames(tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
ImmutableMap.Builder<String, Partition> partitionBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : batch.entrySet()) {
if (entry.getValue().isEmpty()) {
throw new TrinoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Partition no longer exists: " + entry.getKey());
}
partitionBuilder.put(entry.getKey(), entry.getValue().get());
}
Map<String, Partition> partitions = partitionBuilder.buildOrThrow();
if (partitionBatch.size() != partitions.size()) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitions.size()));
}
ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
for (HivePartition hivePartition : partitionBatch) {
Partition partition = partitions.get(hivePartition.getPartitionId());
if (partition == null) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
}
String partName = makePartitionName(table, partition);
// verify partition is online
verifyOnline(tableName, Optional.of(partName), getProtectMode(partition), partition.getParameters());
// verify partition is not marked as non-readable
String partitionNotReadable = partition.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(partitionNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.of(partName), partitionNotReadable);
}
// Verify that the partition schema matches the table schema.
// Either adding or dropping columns from the end of the table
// without modifying existing partitions is allowed, but every
// column that exists in both the table and partition must have
// the same type.
List<Column> tableColumns = table.getDataColumns();
List<Column> partitionColumns = partition.getColumns();
if ((tableColumns == null) || (partitionColumns == null)) {
throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partName));
}
TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, storageFormat, tableName, partName, tableColumns, partitionColumns);
if (bucketProperty.isPresent()) {
Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
if (partitionBucketProperty.isEmpty()) {
throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
}
int tableBucketCount = bucketProperty.get().getBucketCount();
int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
List<String> tableBucketColumns = bucketProperty.get().getBucketedBy();
List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
}
if (isPropagateTableScanSortingProperties(session)) {
List<SortingColumn> tableSortedColumns = bucketProperty.get().getSortedBy();
List<SortingColumn> partitionSortedColumns = partitionBucketProperty.get().getSortedBy();
if (!isSortingCompatible(tableSortedColumns, partitionSortedColumns)) {
throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) sorting by %s is not compatible with partition (%s) sorting by %s. This restriction can be avoided by disabling propagate_table_scan_sorting_properties.", hivePartition.getTableName(), tableSortedColumns.stream().map(HiveUtil::sortingColumnToString).collect(toImmutableList()), hivePartition.getPartitionId(), partitionSortedColumns.stream().map(HiveUtil::sortingColumnToString).collect(toImmutableList())));
}
}
}
results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping));
}
return results.build();
});
return concat(partitionBatches);
}
Aggregations