use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class HiveSplitManager method getPartitionMetadata.
private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketProperty> bucketProperty) {
if (hivePartitions.isEmpty()) {
return ImmutableList.of();
}
if (hivePartitions.size() == 1) {
HivePartition firstPartition = getOnlyElement(hivePartitions);
if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), ImmutableMap.of()));
}
}
Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
Map<String, Optional<Partition>> batch = metastore.getPartitionsByNames(tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
ImmutableMap.Builder<String, Partition> partitionBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : batch.entrySet()) {
if (!entry.getValue().isPresent()) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Partition metadata not available");
}
partitionBuilder.put(entry.getKey(), entry.getValue().get());
}
Map<String, Partition> partitions = partitionBuilder.build();
Verify.verify(partitions.size() == partitionBatch.size());
if (partitionBatch.size() != partitions.size()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitions.size()));
}
ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
for (HivePartition hivePartition : partitionBatch) {
Partition partition = partitions.get(hivePartition.getPartitionId());
if (partition == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
}
String protectMode = partition.getParameters().get(ProtectMode.PARAMETER_NAME);
String partName = makePartName(table.getPartitionColumns(), partition.getValues());
if (protectMode != null && getProtectModeFromString(protectMode).offline) {
throw new PartitionOfflineException(tableName, partName, false, null);
}
String prestoOffline = partition.getParameters().get(PRESTO_OFFLINE);
if (!isNullOrEmpty(prestoOffline)) {
throw new PartitionOfflineException(tableName, partName, true, prestoOffline);
}
List<Column> tableColumns = table.getDataColumns();
List<Column> partitionColumns = partition.getColumns();
if ((tableColumns == null) || (partitionColumns == null)) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partName));
}
ImmutableMap.Builder<Integer, HiveType> columnCoercions = ImmutableMap.builder();
for (int i = 0; i < min(partitionColumns.size(), tableColumns.size()); i++) {
HiveType tableType = tableColumns.get(i).getType();
HiveType partitionType = partitionColumns.get(i).getType();
if (!tableType.equals(partitionType)) {
if (!coercionPolicy.canCoerce(partitionType, tableType)) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "There is a mismatch between the table and partition schemas. " + "The types are incompatible and cannot be coerced. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partName, partitionColumns.get(i).getName(), partitionType));
}
columnCoercions.put(i, partitionType);
}
}
Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
checkCondition(partitionBucketProperty.equals(bucketProperty), HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH, "Hive table (%s) bucketing property (%s) does not match partition (%s) bucketing property (%s)", hivePartition.getTableName(), bucketProperty, hivePartition.getPartitionId(), partitionBucketProperty);
results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), columnCoercions.build()));
}
return results.build();
});
return concat(partitionBatches);
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class AbstractTestHiveClient method listAllDataPaths.
public static List<String> listAllDataPaths(SemiTransactionalHiveMetastore metastore, String schemaName, String tableName) {
ImmutableList.Builder<String> locations = ImmutableList.builder();
Table table = metastore.getTable(schemaName, tableName).get();
if (table.getStorage().getLocation() != null) {
// For partitioned table, there should be nothing directly under this directory.
// But including this location in the set makes the directory content assert more
// extensive, which is desirable.
locations.add(table.getStorage().getLocation());
}
Optional<List<String>> partitionNames = metastore.getPartitionNames(schemaName, tableName);
if (partitionNames.isPresent()) {
metastore.getPartitionsByNames(schemaName, tableName, partitionNames.get()).values().stream().map(Optional::get).map(partition -> partition.getStorage().getLocation()).filter(location -> !location.startsWith(table.getStorage().getLocation())).forEach(locations::add);
}
return locations.build();
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class FileHiveMetastore method addPartitions.
@Override
public synchronized void addPartitions(String databaseName, String tableName, List<Partition> partitions) {
requireNonNull(databaseName, "databaseName is null");
requireNonNull(tableName, "tableName is null");
requireNonNull(partitions, "partitions is null");
Table table = getRequiredTable(databaseName, tableName);
TableType tableType = TableType.valueOf(table.getTableType());
checkArgument(EnumSet.of(MANAGED_TABLE, EXTERNAL_TABLE).contains(tableType), "Invalid table type: %s", tableType);
try {
Map<Path, byte[]> schemaFiles = new LinkedHashMap<>();
for (Partition partition : partitions) {
verifiedPartition(table, partition);
Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
Path schemaPath = new Path(partitionMetadataDirectory, PRESTO_SCHEMA_FILE_NAME);
if (metadataFileSystem.exists(schemaPath)) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Partition already exists");
}
byte[] schemaJson = partitionCodec.toJsonBytes(new PartitionMetadata(table, partition));
schemaFiles.put(schemaPath, schemaJson);
}
Set<Path> createdFiles = new LinkedHashSet<>();
try {
for (Entry<Path, byte[]> entry : schemaFiles.entrySet()) {
try (OutputStream outputStream = metadataFileSystem.create(entry.getKey())) {
createdFiles.add(entry.getKey());
outputStream.write(entry.getValue());
} catch (IOException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Could not write partition schema", e);
}
}
} catch (Throwable e) {
for (Path createdFile : createdFiles) {
try {
metadataFileSystem.delete(createdFile, false);
} catch (IOException ignored) {
}
}
throw e;
}
} catch (IOException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e);
}
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class HiveMetadata method beginInsertInternal.
private HiveInsertTableHandle beginInsertInternal(ConnectorSession session, ConnectorTableHandle tableHandle) {
verifyJvmTimeZone();
MetastoreContext metastoreContext = getMetastoreContext(session);
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new PrestoException(NOT_SUPPORTED, format("Inserting into Hive table %s with column type %s not supported", tableName, column.getType()));
}
}
List<HiveColumnHandle> handles = hiveColumnHandles(table).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toList());
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
LocationHandle locationHandle;
boolean isTemporaryTable = table.getTableType().equals(TEMPORARY_TABLE);
boolean tempPathRequired = isTempPathRequired(session, table.getStorage().getBucketProperty(), decodePreferredOrderingColumnsFromStorage(table.getStorage()));
if (isTemporaryTable) {
locationHandle = locationService.forTemporaryTable(metastore, session, table, tempPathRequired);
} else {
locationHandle = locationService.forExistingTable(metastore, session, table, tempPathRequired);
}
Optional<? extends TableEncryptionProperties> tableEncryptionProperties = getTableEncryptionPropertiesFromHiveProperties(table.getParameters(), tableStorageFormat);
HiveStorageFormat partitionStorageFormat = isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session);
HiveStorageFormat actualStorageFormat = table.getPartitionColumns().isEmpty() ? tableStorageFormat : partitionStorageFormat;
if (tableEncryptionProperties.isPresent() && actualStorageFormat != tableStorageFormat) {
throw new PrestoException(INVALID_TABLE_PROPERTY, format("For encrypted tables, partition format (%s) should match table format (%s). Using the session property %s or appropriately setting %s can help with ensuring this", actualStorageFormat.name(), tableStorageFormat.name(), RESPECT_TABLE_FORMAT, HIVE_STORAGE_FORMAT));
}
HiveInsertTableHandle result = new HiveInsertTableHandle(tableName.getSchemaName(), tableName.getTableName(), handles, metastore.generatePageSinkMetadata(metastoreContext, tableName), locationHandle, table.getStorage().getBucketProperty(), decodePreferredOrderingColumnsFromStorage(table.getStorage()), tableStorageFormat, partitionStorageFormat, actualStorageFormat, getHiveCompressionCodec(session, isTemporaryTable, actualStorageFormat), encryptionInformationProvider.getWriteEncryptionInformation(session, tableEncryptionProperties.map(identity()), tableName.getSchemaName(), tableName.getTableName()));
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
metastore.declareIntentionToWrite(new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), table.getStorage().getLocation(), false), metastoreContext, writeInfo.getWriteMode(), writeInfo.getWritePath(), writeInfo.getTempPath(), tableName, isTemporaryTable);
return result;
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = getPartitionUpdates(session, fragments);
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the zero-row files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), false);
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of)));
zeroRowFileCreator.createFiles(session, hdfsContext, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), getStorageFormat(partition, table), handle.getCompressionCodec(), getSchema(partition, table));
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
Set<String> existingPartitions = getExistingPartitionNames(session.getIdentity(), metastoreContext, handle.getSchemaName(), handle.getTableName(), partitionUpdates);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing table with encryption enabled is not supported yet");
}
// insert into unpartitioned table
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == APPEND) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing partition with encryption enabled is not supported yet");
}
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partitionValues, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) {
Map<String, String> extraPartitionMetadata = handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of);
if (isPreferManifestsToListFiles(session) && isFileRenamingEnabled(session)) {
// Store list of file names and sizes in partition metadata when prefer_manifests_to_list_files and file_renaming_enabled are set to true
extraPartitionMetadata = updatePartitionMetadataWithFileNamesAndSizes(partitionUpdate, extraPartitionMetadata);
}
// Track the manifest blob size
getManifestSizeInBytes(session, partitionUpdate, extraPartitionMetadata).ifPresent(hivePartitionStats::addManifestSizeInBytes);
// insert into new partition or overwrite existing partition
Partition partition = partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, extraPartitionMetadata);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
if (existingPartitions.contains(partitionUpdate.getName())) {
if (partitionUpdate.getUpdateMode() == OVERWRITE) {
metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partition.getValues());
} else {
throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionUpdate.getName());
}
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), table.getStorage().getLocation(), false, partition, partitionUpdate.getWritePath(), partitionStatistics);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).map(name -> name.isEmpty() ? UNPARTITIONED_ID : name).collect(toList())));
}
Aggregations