use of io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY in project trino by trinodb.
the class SemiTransactionalHiveMetastore method rollbackShared.
@GuardedBy("this")
private void rollbackShared() {
checkHoldsLock();
for (DeclaredIntentionToWrite declaredIntentionToWrite : declaredIntentionsToWrite) {
switch(declaredIntentionToWrite.getMode()) {
case STAGE_AND_MOVE_TO_TARGET_DIRECTORY:
case DIRECT_TO_TARGET_NEW_DIRECTORY:
// it will only be written to during the commit call and the commit call cleans up after failures.
if ((declaredIntentionToWrite.getMode() == DIRECT_TO_TARGET_NEW_DIRECTORY) && skipTargetCleanupOnRollback) {
break;
}
Path rootPath = declaredIntentionToWrite.getRootPath();
// In the case of DIRECT_TO_TARGET_NEW_DIRECTORY, if the directory is not guaranteed to be unique
// for the query, it is possible that another query or compute engine may see the directory, wrote
// data to it, and exported it through metastore. Therefore it may be argued that cleanup of staging
// directories must be carried out conservatively. To be safe, we only delete files that start or
// end with the query IDs in this transaction.
recursiveDeleteFilesAndLog(declaredIntentionToWrite.getHdfsContext(), rootPath, ImmutableSet.of(declaredIntentionToWrite.getQueryId()), true, format("staging/target_new directory rollback for table %s", declaredIntentionToWrite.getSchemaTableName()));
break;
case DIRECT_TO_TARGET_EXISTING_DIRECTORY:
Set<Path> pathsToClean = new HashSet<>();
// Check the base directory of the declared intention
// * existing partition may also be in this directory
// * this is where new partitions are created
Path baseDirectory = declaredIntentionToWrite.getRootPath();
pathsToClean.add(baseDirectory);
SchemaTableName schemaTableName = declaredIntentionToWrite.getSchemaTableName();
Optional<Table> table = delegate.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName());
if (table.isPresent()) {
// check every existing partition that is outside for the base directory
List<Column> partitionColumns = table.get().getPartitionColumns();
if (!partitionColumns.isEmpty()) {
List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
List<String> partitionNames = delegate.getPartitionNamesByFilter(schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionColumnNames, TupleDomain.all()).orElse(ImmutableList.of());
for (List<String> partitionNameBatch : Iterables.partition(partitionNames, 10)) {
Collection<Optional<Partition>> partitions = delegate.getPartitionsByNames(schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNameBatch).values();
partitions.stream().filter(Optional::isPresent).map(Optional::get).map(partition -> partition.getStorage().getLocation()).map(Path::new).filter(path -> !isSameOrParent(baseDirectory, path)).forEach(pathsToClean::add);
}
}
} else {
logCleanupFailure("Error rolling back write to table %s.%s. Data directory may contain temporary data. Table was dropped in another transaction.", schemaTableName.getSchemaName(), schemaTableName.getTableName());
}
// delete any file that starts or ends with the query ID
for (Path path : pathsToClean) {
// TODO: It is a known deficiency that some empty directory does not get cleaned up in S3.
// We cannot delete any of the directories here since we do not know who created them.
recursiveDeleteFilesAndLog(declaredIntentionToWrite.getHdfsContext(), path, ImmutableSet.of(declaredIntentionToWrite.getQueryId()), false, format("target_existing directory rollback for table %s", schemaTableName));
}
break;
default:
throw new UnsupportedOperationException("Unknown write mode");
}
}
}
Aggregations