use of com.facebook.presto.hive.PartitionUpdate.UpdateMode.APPEND in project presto by prestodb.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = getPartitionUpdates(session, fragments);
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the zero-row files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), false);
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of)));
zeroRowFileCreator.createFiles(session, hdfsContext, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), getStorageFormat(partition, table), handle.getCompressionCodec(), getSchema(partition, table));
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
Set<String> existingPartitions = getExistingPartitionNames(session.getIdentity(), metastoreContext, handle.getSchemaName(), handle.getTableName(), partitionUpdates);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing table with encryption enabled is not supported yet");
}
// insert into unpartitioned table
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == APPEND) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing partition with encryption enabled is not supported yet");
}
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partitionValues, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) {
Map<String, String> extraPartitionMetadata = handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of);
if (isPreferManifestsToListFiles(session) && isFileRenamingEnabled(session)) {
// Store list of file names and sizes in partition metadata when prefer_manifests_to_list_files and file_renaming_enabled are set to true
extraPartitionMetadata = updatePartitionMetadataWithFileNamesAndSizes(partitionUpdate, extraPartitionMetadata);
}
// Track the manifest blob size
getManifestSizeInBytes(session, partitionUpdate, extraPartitionMetadata).ifPresent(hivePartitionStats::addManifestSizeInBytes);
// insert into new partition or overwrite existing partition
Partition partition = partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, extraPartitionMetadata);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
if (existingPartitions.contains(partitionUpdate.getName())) {
if (partitionUpdate.getUpdateMode() == OVERWRITE) {
metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partition.getValues());
} else {
throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionUpdate.getName());
}
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), table.getStorage().getLocation(), false, partition, partitionUpdate.getWritePath(), partitionStatistics);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).map(name -> name.isEmpty() ? UNPARTITIONED_ID : name).collect(toList())));
}
use of com.facebook.presto.hive.PartitionUpdate.UpdateMode.APPEND in project presto by prestodb.
the class HiveMetadata method computePartitionUpdatesForMissingBuckets.
private List<PartitionUpdate> computePartitionUpdatesForMissingBuckets(ConnectorSession session, HiveWritableTableHandle handle, Table table, List<PartitionUpdate> partitionUpdates) {
// avoid creation of PartitionUpdate with empty list of files
if (!shouldCreateFilesForMissingBuckets(table, session)) {
return ImmutableList.of();
}
HiveStorageFormat storageFormat = table.getPartitionColumns().isEmpty() ? handle.getTableStorageFormat() : handle.getPartitionStorageFormat();
// empty un-partitioned bucketed table
if (table.getPartitionColumns().isEmpty() && partitionUpdates.isEmpty()) {
int bucketCount = handle.getBucketProperty().get().getBucketCount();
LocationHandle locationHandle = handle.getLocationHandle();
List<String> fileNamesForMissingBuckets = computeFileNamesForMissingBuckets(session, storageFormat, handle.getCompressionCodec(), bucketCount, ImmutableSet.of());
return ImmutableList.of(new PartitionUpdate("", (handle instanceof HiveInsertTableHandle) ? APPEND : NEW, locationHandle.getWritePath(), locationHandle.getTargetPath(), fileNamesForMissingBuckets.stream().map(fileName -> new FileWriteInfo(fileName, fileName, Optional.of(0L))).collect(toImmutableList()), 0, 0, 0, isFileRenamingEnabled(session)));
}
ImmutableList.Builder<PartitionUpdate> partitionUpdatesForMissingBucketsBuilder = ImmutableList.builder();
for (PartitionUpdate partitionUpdate : partitionUpdates) {
int bucketCount = handle.getBucketProperty().get().getBucketCount();
List<String> fileNamesForMissingBuckets = computeFileNamesForMissingBuckets(session, storageFormat, handle.getCompressionCodec(), bucketCount, ImmutableSet.copyOf(getTargetFileNames(partitionUpdate.getFileWriteInfos())));
partitionUpdatesForMissingBucketsBuilder.add(new PartitionUpdate(partitionUpdate.getName(), partitionUpdate.getUpdateMode(), partitionUpdate.getWritePath(), partitionUpdate.getTargetPath(), fileNamesForMissingBuckets.stream().map(fileName -> new FileWriteInfo(fileName, fileName, Optional.of(0L))).collect(toImmutableList()), 0, 0, 0, isFileRenamingEnabled(session)));
}
return partitionUpdatesForMissingBucketsBuilder.build();
}
Aggregations