Search in sources :

Example 1 with SerializableFunction

use of org.apache.hudi.common.function.SerializableFunction in project hudi by apache.

the class BaseRollbackHelper method maybeDeleteAndCollectStats.

/**
 * May be delete interested files and collect stats or collect stats only.
 *
 * @param context           instance of {@link HoodieEngineContext} to use.
 * @param instantToRollback {@link HoodieInstant} of interest for which deletion or collect stats is requested.
 * @param rollbackRequests  List of {@link ListingBasedRollbackRequest} to be operated on.
 * @param doDelete          {@code true} if deletion has to be done. {@code false} if only stats are to be collected w/o performing any deletes.
 * @return stats collected with or w/o actual deletions.
 */
List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineContext context, HoodieInstant instantToRollback, List<SerializableHoodieRollbackRequest> rollbackRequests, boolean doDelete, int numPartitions) {
    return context.flatMap(rollbackRequests, (SerializableFunction<SerializableHoodieRollbackRequest, Stream<Pair<String, HoodieRollbackStat>>>) rollbackRequest -> {
        List<String> filesToBeDeleted = rollbackRequest.getFilesToBeDeleted();
        if (!filesToBeDeleted.isEmpty()) {
            List<HoodieRollbackStat> rollbackStats = deleteFiles(metaClient, filesToBeDeleted, doDelete);
            List<Pair<String, HoodieRollbackStat>> partitionToRollbackStats = new ArrayList<>();
            rollbackStats.forEach(entry -> partitionToRollbackStats.add(Pair.of(entry.getPartitionPath(), entry)));
            return partitionToRollbackStats.stream();
        } else if (!rollbackRequest.getLogBlocksToBeDeleted().isEmpty()) {
            HoodieLogFormat.Writer writer = null;
            try {
                String fileId = rollbackRequest.getFileId();
                String latestBaseInstant = rollbackRequest.getLatestBaseInstant();
                writer = HoodieLogFormat.newWriterBuilder().onParentPath(FSUtils.getPartitionPath(metaClient.getBasePath(), rollbackRequest.getPartitionPath())).withFileId(fileId).overBaseCommit(latestBaseInstant).withFs(metaClient.getFs()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
                if (doDelete) {
                    Map<HoodieLogBlock.HeaderMetadataType, String> header = generateHeader(instantToRollback.getTimestamp());
                    writer.appendBlock(new HoodieCommandBlock(header));
                }
            } catch (IOException | InterruptedException io) {
                throw new HoodieRollbackException("Failed to rollback for instant " + instantToRollback, io);
            } finally {
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (IOException io) {
                    throw new HoodieIOException("Error appending rollback block", io);
                }
            }
            Map<FileStatus, Long> filesToNumBlocksRollback = Collections.singletonMap(metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()), 1L);
            return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).withRollbackBlockAppendResults(filesToNumBlocksRollback).build())).stream();
        } else {
            return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).build())).stream();
        }
    }, numPartitions);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) PathFilter(org.apache.hadoop.fs.PathFilter) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) SerializableFunction(org.apache.hudi.common.function.SerializableFunction) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieRollbackRequest(org.apache.hudi.avro.model.HoodieRollbackRequest) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) Stream(java.util.stream.Stream) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 Path (org.apache.hadoop.fs.Path)1 PathFilter (org.apache.hadoop.fs.PathFilter)1 HoodieRollbackRequest (org.apache.hudi.avro.model.HoodieRollbackRequest)1 HoodieRollbackStat (org.apache.hudi.common.HoodieRollbackStat)1 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)1 FSUtils (org.apache.hudi.common.fs.FSUtils)1 SerializableFunction (org.apache.hudi.common.function.SerializableFunction)1 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)1