use of com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA in project presto by prestodb.
the class HiveMaterializedViewUtils method getMaterializedDataPredicates.
public static MaterializedDataPredicates getMaterializedDataPredicates(SemiTransactionalHiveMetastore metastore, MetastoreContext metastoreContext, TypeManager typeManager, Table table, DateTimeZone timeZone) {
List<Column> partitionColumns = table.getPartitionColumns();
for (Column partitionColumn : partitionColumns) {
HiveType hiveType = partitionColumn.getType();
if (!hiveType.isSupportedType()) {
throw new PrestoException(NOT_SUPPORTED, String.format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
}
}
List<HiveColumnHandle> partitionKeyColumnHandles = getPartitionKeyColumnHandles(table);
Map<String, Type> partitionTypes = partitionKeyColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> typeManager.getType(column.getTypeSignature())));
List<String> partitionNames = metastore.getPartitionNames(metastoreContext, table.getDatabaseName(), table.getTableName()).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName())));
ImmutableList.Builder<TupleDomain<String>> partitionNamesAndValues = ImmutableList.builder();
for (String partitionName : partitionNames) {
ImmutableMap.Builder<String, NullableValue> partitionNameAndValuesMap = ImmutableMap.builder();
Map<String, String> partitions = toPartitionNamesAndValues(partitionName);
if (partitionColumns.size() != partitions.size()) {
throw new PrestoException(HIVE_INVALID_METADATA, String.format("Expected %d partition key values, but got %d", partitionColumns.size(), partitions.size()));
}
partitionTypes.forEach((name, type) -> {
String value = partitions.get(name);
if (value == null) {
throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, String.format("partition key value cannot be null for field: %s", name));
}
partitionNameAndValuesMap.put(name, parsePartitionValue(name, value, type, timeZone));
});
TupleDomain<String> tupleDomain = TupleDomain.fromFixedValues(partitionNameAndValuesMap.build());
partitionNamesAndValues.add(tupleDomain);
}
return new MaterializedDataPredicates(partitionNamesAndValues.build(), partitionColumns.stream().map(Column::getName).collect(toImmutableList()));
}
use of com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA in project presto by prestodb.
the class HiveMetadata method columnMetadataGetter.
private static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(Table table, TypeManager typeManager) {
ImmutableList.Builder<String> columnNames = ImmutableList.builder();
table.getPartitionColumns().stream().map(Column::getName).forEach(columnNames::add);
table.getDataColumns().stream().map(Column::getName).forEach(columnNames::add);
List<String> allColumnNames = columnNames.build();
if (allColumnNames.size() > Sets.newHashSet(allColumnNames).size()) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Hive metadata for table %s is invalid: Table descriptor contains duplicate columns", table.getTableName()));
}
List<Column> tableColumns = table.getDataColumns();
ImmutableMap.Builder<String, Optional<String>> builder = ImmutableMap.builder();
for (Column field : concat(tableColumns, table.getPartitionColumns())) {
if ((field.getComment() != null) && !Optional.of("from deserializer").equals(field.getComment())) {
builder.put(field.getName(), field.getComment());
} else {
builder.put(field.getName(), Optional.empty());
}
}
// add hidden columns
builder.put(PATH_COLUMN_NAME, Optional.empty());
if (table.getStorage().getBucketProperty().isPresent()) {
builder.put(BUCKET_COLUMN_NAME, Optional.empty());
}
Map<String, Optional<String>> columnComment = builder.build();
return handle -> new ColumnMetadata(handle.getName(), typeManager.getType(handle.getTypeSignature()), columnComment.get(handle.getName()).orElse(null), columnExtraInfo(handle.isPartitionKey()), handle.isHidden());
}
use of com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA in project presto by prestodb.
the class HiveMetadata method columnMetadataGetter.
@VisibleForTesting
static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(Table table, TypeManager typeManager, ColumnConverter columnConverter) {
ImmutableList.Builder<String> columnNames = ImmutableList.builder();
table.getPartitionColumns().stream().map(Column::getName).forEach(columnNames::add);
table.getDataColumns().stream().map(Column::getName).forEach(columnNames::add);
List<String> allColumnNames = columnNames.build();
if (allColumnNames.size() > Sets.newHashSet(allColumnNames).size()) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Hive metadata for table %s is invalid: Table descriptor contains duplicate columns", table.getTableName()));
}
List<Column> tableColumns = table.getDataColumns();
ImmutableMap.Builder<String, Optional<String>> builder = ImmutableMap.builder();
ImmutableMap.Builder<String, Optional<String>> typeMetadataBuilder = ImmutableMap.builder();
for (Column field : concat(tableColumns, table.getPartitionColumns())) {
if (field.getComment().isPresent() && !field.getComment().get().equals("from deserializer")) {
builder.put(field.getName(), field.getComment());
} else {
builder.put(field.getName(), Optional.empty());
}
typeMetadataBuilder.put(field.getName(), field.getTypeMetadata());
}
// add hidden columns
builder.put(PATH_COLUMN_NAME, Optional.empty());
if (table.getStorage().getBucketProperty().isPresent()) {
builder.put(BUCKET_COLUMN_NAME, Optional.empty());
}
builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty());
builder.put(FILE_MODIFIED_TIME_COLUMN_NAME, Optional.empty());
Map<String, Optional<String>> columnComment = builder.build();
Map<String, Optional<String>> typeMetadata = typeMetadataBuilder.build();
return handle -> new ColumnMetadata(handle.getName(), typeManager.getType(columnConverter.getTypeSignature(handle.getHiveType(), typeMetadata.getOrDefault(handle.getName(), Optional.empty()))), columnComment.get(handle.getName()).orElse(null), columnExtraInfo(handle.isPartitionKey()), handle.isHidden());
}
use of com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA in project presto by prestodb.
the class HiveSplitManager method getPartitionMetadata.
private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketHandle> hiveBucketHandle, ConnectorSession session, WarningCollector warningCollector, Optional<Set<HiveColumnHandle>> requestedColumns, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
if (hivePartitions.isEmpty()) {
return ImmutableList.of();
}
Optional<Set<HiveColumnHandle>> allRequestedColumns = mergeRequestedAndPredicateColumns(requestedColumns, ImmutableSet.copyOf(predicateColumns.values()));
if (hivePartitions.size() == 1) {
HivePartition firstPartition = getOnlyElement(hivePartitions);
if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty(), encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns), ImmutableSet.of()));
}
}
StorageFormat storageFormat = table.getStorage().getStorageFormat();
Optional<HiveStorageFormat> hiveStorageFormat = getHiveStorageFormat(storageFormat);
Optional<HiveStorageFormat> resolvedHiveStorageFormat;
if (isUseParquetColumnNames(session)) {
// Use Hive Storage Format as Parquet if table is of HUDI format
resolvedHiveStorageFormat = (!hiveStorageFormat.isPresent() && isHudiFormat(storageFormat)) ? Optional.of(PARQUET) : hiveStorageFormat;
} else {
resolvedHiveStorageFormat = hiveStorageFormat;
}
Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
Map<String, PartitionSplitInfo> partitionSplitInfo = getPartitionSplitInfo(session, metastore, tableName, partitionBatch, predicateColumns, domains);
if (partitionBatch.size() != partitionSplitInfo.size()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitionSplitInfo.size()));
}
Map<String, Partition> partitions = partitionSplitInfo.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getPartition()));
Optional<Map<String, EncryptionInformation>> encryptionInformationForPartitions = encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns, partitions);
ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
Map<String, Set<String>> partitionsNotReadable = new HashMap<>();
int unreadablePartitionsSkipped = 0;
for (HivePartition hivePartition : partitionBatch) {
Partition partition = partitions.get(hivePartition.getPartitionId());
if (partitionSplitInfo.get(hivePartition.getPartitionId()).isPruned()) {
continue;
}
if (partition == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
}
String partitionName = makePartName(table.getPartitionColumns(), partition.getValues());
Optional<EncryptionInformation> encryptionInformation = encryptionInformationForPartitions.map(metadata -> metadata.get(hivePartition.getPartitionId()));
if (!isOfflineDataDebugModeEnabled(session)) {
// verify partition is online
verifyOnline(tableName, Optional.of(partitionName), getProtectMode(partition), partition.getParameters());
// verify partition is not marked as non-readable
String reason = partition.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(reason)) {
if (!shouldIgnoreUnreadablePartition(session) || !partition.isEligibleToIgnore()) {
throw new HiveNotReadableException(tableName, Optional.of(partitionName), reason);
}
unreadablePartitionsSkipped++;
if (partitionsNotReadable.size() <= 3) {
partitionsNotReadable.putIfAbsent(reason, new HashSet<>(ImmutableSet.of(partitionName)));
if (partitionsNotReadable.get(reason).size() <= 3) {
partitionsNotReadable.get(reason).add(partitionName);
}
}
continue;
}
}
// Verify that the partition schema matches the table schema.
// Either adding or dropping columns from the end of the table
// without modifying existing partitions is allowed, but every
// column that exists in both the table and partition must have
// the same type.
List<Column> tableColumns = table.getDataColumns();
List<Column> partitionColumns = partition.getColumns();
if ((tableColumns == null) || (partitionColumns == null)) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partitionName));
}
TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, resolvedHiveStorageFormat, tableName, partitionName, tableColumns, partitionColumns);
if (hiveBucketHandle.isPresent() && !hiveBucketHandle.get().isVirtuallyBucketed()) {
Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
if (!partitionBucketProperty.isPresent()) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
}
int tableBucketCount = hiveBucketHandle.get().getTableBucketCount();
int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
List<String> tableBucketColumns = hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableList());
List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
}
}
results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping, encryptionInformation, partitionSplitInfo.get(hivePartition.getPartitionId()).getRedundantColumnDomains()));
}
if (unreadablePartitionsSkipped > 0) {
StringBuilder warningMessage = new StringBuilder(format("Table '%s' has %s out of %s partitions unreadable: ", tableName, unreadablePartitionsSkipped, partitionBatch.size()));
for (Entry<String, Set<String>> entry : partitionsNotReadable.entrySet()) {
warningMessage.append(String.join(", ", entry.getValue())).append("... are due to ").append(entry.getKey()).append(". ");
}
warningCollector.add(new PrestoWarning(PARTITION_NOT_READABLE, warningMessage.toString()));
}
return results.build();
});
return concat(partitionBatches);
}
Aggregations