use of com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore in project presto by prestodb.
the class HiveSplitManager method getPartitionMetadata.
private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketHandle> hiveBucketHandle, ConnectorSession session, WarningCollector warningCollector, Optional<Set<HiveColumnHandle>> requestedColumns, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
if (hivePartitions.isEmpty()) {
return ImmutableList.of();
}
Optional<Set<HiveColumnHandle>> allRequestedColumns = mergeRequestedAndPredicateColumns(requestedColumns, ImmutableSet.copyOf(predicateColumns.values()));
if (hivePartitions.size() == 1) {
HivePartition firstPartition = getOnlyElement(hivePartitions);
if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty(), encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns), ImmutableSet.of()));
}
}
StorageFormat storageFormat = table.getStorage().getStorageFormat();
Optional<HiveStorageFormat> hiveStorageFormat = getHiveStorageFormat(storageFormat);
Optional<HiveStorageFormat> resolvedHiveStorageFormat;
if (isUseParquetColumnNames(session)) {
// Use Hive Storage Format as Parquet if table is of HUDI format
resolvedHiveStorageFormat = (!hiveStorageFormat.isPresent() && isHudiFormat(storageFormat)) ? Optional.of(PARQUET) : hiveStorageFormat;
} else {
resolvedHiveStorageFormat = hiveStorageFormat;
}
Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
Map<String, PartitionSplitInfo> partitionSplitInfo = getPartitionSplitInfo(session, metastore, tableName, partitionBatch, predicateColumns, domains);
if (partitionBatch.size() != partitionSplitInfo.size()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitionSplitInfo.size()));
}
Map<String, Partition> partitions = partitionSplitInfo.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getPartition()));
Optional<Map<String, EncryptionInformation>> encryptionInformationForPartitions = encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns, partitions);
ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
Map<String, Set<String>> partitionsNotReadable = new HashMap<>();
int unreadablePartitionsSkipped = 0;
for (HivePartition hivePartition : partitionBatch) {
Partition partition = partitions.get(hivePartition.getPartitionId());
if (partitionSplitInfo.get(hivePartition.getPartitionId()).isPruned()) {
continue;
}
if (partition == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
}
String partitionName = makePartName(table.getPartitionColumns(), partition.getValues());
Optional<EncryptionInformation> encryptionInformation = encryptionInformationForPartitions.map(metadata -> metadata.get(hivePartition.getPartitionId()));
if (!isOfflineDataDebugModeEnabled(session)) {
// verify partition is online
verifyOnline(tableName, Optional.of(partitionName), getProtectMode(partition), partition.getParameters());
// verify partition is not marked as non-readable
String reason = partition.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(reason)) {
if (!shouldIgnoreUnreadablePartition(session) || !partition.isEligibleToIgnore()) {
throw new HiveNotReadableException(tableName, Optional.of(partitionName), reason);
}
unreadablePartitionsSkipped++;
if (partitionsNotReadable.size() <= 3) {
partitionsNotReadable.putIfAbsent(reason, new HashSet<>(ImmutableSet.of(partitionName)));
if (partitionsNotReadable.get(reason).size() <= 3) {
partitionsNotReadable.get(reason).add(partitionName);
}
}
continue;
}
}
// Verify that the partition schema matches the table schema.
// Either adding or dropping columns from the end of the table
// without modifying existing partitions is allowed, but every
// column that exists in both the table and partition must have
// the same type.
List<Column> tableColumns = table.getDataColumns();
List<Column> partitionColumns = partition.getColumns();
if ((tableColumns == null) || (partitionColumns == null)) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partitionName));
}
TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, resolvedHiveStorageFormat, tableName, partitionName, tableColumns, partitionColumns);
if (hiveBucketHandle.isPresent() && !hiveBucketHandle.get().isVirtuallyBucketed()) {
Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
if (!partitionBucketProperty.isPresent()) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
}
int tableBucketCount = hiveBucketHandle.get().getTableBucketCount();
int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
List<String> tableBucketColumns = hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableList());
List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
}
}
results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping, encryptionInformation, partitionSplitInfo.get(hivePartition.getPartitionId()).getRedundantColumnDomains()));
}
if (unreadablePartitionsSkipped > 0) {
StringBuilder warningMessage = new StringBuilder(format("Table '%s' has %s out of %s partitions unreadable: ", tableName, unreadablePartitionsSkipped, partitionBatch.size()));
for (Entry<String, Set<String>> entry : partitionsNotReadable.entrySet()) {
warningMessage.append(String.join(", ", entry.getValue())).append("... are due to ").append(entry.getKey()).append(". ");
}
warningCollector.add(new PrestoWarning(PARTITION_NOT_READABLE, warningMessage.toString()));
}
return results.build();
});
return concat(partitionBatches);
}
use of com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore in project presto by prestodb.
the class SyncPartitionMetadataProcedure method doSyncPartitionMetadata.
private void doSyncPartitionMetadata(ConnectorSession session, String schemaName, String tableName, String mode, boolean caseSensitive) {
SyncMode syncMode = toSyncMode(mode);
SemiTransactionalHiveMetastore metastore = hiveMetadataFactory.get().getMetastore();
SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName);
Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (table.getPartitionColumns().isEmpty()) {
throw new PrestoException(INVALID_PROCEDURE_ARGUMENT, "Table is not partitioned: " + schemaTableName);
}
Path tableLocation = new Path(table.getStorage().getLocation());
HdfsContext context = new HdfsContext(session, schemaName, tableName, table.getStorage().getLocation(), false);
Set<String> partitionsToAdd;
Set<String> partitionsToDrop;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, tableLocation);
List<String> partitionsInMetastore = metastore.getPartitionNames(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionsInFileSystem = listDirectory(fileSystem, fileSystem.getFileStatus(tableLocation), table.getPartitionColumns(), table.getPartitionColumns().size(), caseSensitive).stream().map(fileStatus -> fileStatus.getPath().toUri()).map(uri -> tableLocation.toUri().relativize(uri).getPath()).collect(toImmutableList());
// partitions in file system but not in metastore
partitionsToAdd = difference(partitionsInFileSystem, partitionsInMetastore);
// partitions in metastore but not in file system
partitionsToDrop = difference(partitionsInMetastore, partitionsInFileSystem);
} catch (IOException e) {
throw new PrestoException(HIVE_FILESYSTEM_ERROR, e);
}
syncPartitions(partitionsToAdd, partitionsToDrop, syncMode, metastore, session, table);
}
use of com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore in project presto by prestodb.
the class AbstractTestHiveClient method listAllDataPaths.
public static List<String> listAllDataPaths(MetastoreContext metastoreContext, SemiTransactionalHiveMetastore metastore, String schemaName, String tableName) {
ImmutableList.Builder<String> locations = ImmutableList.builder();
Table table = metastore.getTable(metastoreContext, schemaName, tableName).get();
if (table.getStorage().getLocation() != null) {
// For partitioned table, there should be nothing directly under this directory.
// But including this location in the set makes the directory content assert more
// extensive, which is desirable.
locations.add(table.getStorage().getLocation());
}
Optional<List<String>> partitionNames = metastore.getPartitionNames(metastoreContext, schemaName, tableName);
if (partitionNames.isPresent()) {
metastore.getPartitionsByNames(metastoreContext, schemaName, tableName, partitionNames.get()).values().stream().map(Optional::get).map(partition -> partition.getStorage().getLocation()).filter(location -> !location.startsWith(table.getStorage().getLocation())).forEach(locations::add);
}
return locations.build();
}
use of com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore in project presto by prestodb.
the class AbstractTestHiveClient method partitionTargetPath.
protected String partitionTargetPath(SchemaTableName schemaTableName, String partitionName) {
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
SemiTransactionalHiveMetastore metastore = transaction.getMetastore();
LocationService locationService = getLocationService();
Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER), schemaTableName.getSchemaName(), schemaTableName.getTableName()).get();
LocationHandle handle = locationService.forExistingTable(metastore, session, table, false);
return locationService.getPartitionWriteInfo(handle, Optional.empty(), partitionName).getTargetPath().toString();
}
}
Aggregations