use of org.apache.hudi.common.HoodieCleanStat in project hudi by apache.
the class CleanerUtils method convertCleanMetadata.
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime, Option<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = new HashMap<>();
Map<String, HoodieCleanPartitionMetadata> partitionBootstrapMetadataMap = new HashMap<>();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata = new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(), stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataMap.put(stat.getPartitionPath(), metadata);
if ((null != stat.getDeleteBootstrapBasePathPatterns()) && (!stat.getDeleteBootstrapBasePathPatterns().isEmpty())) {
HoodieCleanPartitionMetadata bootstrapMetadata = new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(), stat.getDeleteBootstrapBasePathPatterns(), stat.getSuccessDeleteBootstrapBaseFiles(), stat.getFailedDeleteBootstrapBaseFiles());
partitionBootstrapMetadataMap.put(stat.getPartitionPath(), bootstrapMetadata);
}
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
// This will be the same for all partitions
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted, earliestCommitToRetain, partitionMetadataMap, CLEAN_METADATA_VERSION_2, partitionBootstrapMetadataMap);
}
use of org.apache.hudi.common.HoodieCleanStat in project hudi by apache.
the class HoodieTestTable method addClean.
public HoodieTestTable addClean(String instantTime) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
return HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
use of org.apache.hudi.common.HoodieCleanStat in project hudi by apache.
the class CleanActionExecutor method runClean.
private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
ValidationUtils.checkArgument(cleanInstant.getState().equals(HoodieInstant.State.REQUESTED) || cleanInstant.getState().equals(HoodieInstant.State.INFLIGHT));
try {
final HoodieInstant inflightInstant;
final HoodieTimer timer = new HoodieTimer();
timer.startTimer();
if (cleanInstant.isRequested()) {
inflightInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
} else {
inflightInstant = cleanInstant;
}
List<HoodieCleanStat> cleanStats = clean(context, cleanerPlan);
if (cleanStats.isEmpty()) {
return HoodieCleanMetadata.newBuilder().build();
}
table.getMetaClient().reloadActiveTimeline();
HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(inflightInstant.getTimestamp(), Option.of(timer.endTimer()), cleanStats);
if (!skipLocking) {
this.txnManager.beginTransaction(Option.empty(), Option.empty());
}
writeTableMetadata(metadata, inflightInstant.getTimestamp());
table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
return metadata;
} catch (IOException e) {
throw new HoodieIOException("Failed to clean up after commit", e);
} finally {
if (!skipLocking) {
this.txnManager.endTransaction(Option.empty());
}
}
}
use of org.apache.hudi.common.HoodieCleanStat in project hudi by apache.
the class HoodieTestTable method getHoodieCleanMetadata.
public Pair<HoodieCleanerPlan, HoodieCleanMetadata> getHoodieCleanMetadata(String commitTime, HoodieTestTableState testTableState) {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(commitTime, CLEAN_ACTION, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
List<HoodieCleanStat> cleanStats = new ArrayList<>();
for (Map.Entry<String, List<String>> entry : testTableState.getPartitionToFileIdMapForCleaner(commitTime).entrySet()) {
cleanStats.add(new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, entry.getKey(), entry.getValue(), entry.getValue(), Collections.emptyList(), commitTime));
}
return Pair.of(cleanerPlan, convertCleanMetadata(commitTime, Option.of(0L), cleanStats));
}
use of org.apache.hudi.common.HoodieCleanStat in project hudi by apache.
the class CleanActionExecutor method clean.
/**
* Performs cleaning of partition paths according to cleaning policy and returns the number of files cleaned. Handles
* skews in partitions to clean by making files to clean as the unit of task distribution.
*
* @throws IllegalArgumentException if unknown cleaning policy is provided
*/
List<HoodieCleanStat> clean(HoodieEngineContext context, HoodieCleanerPlan cleanerPlan) {
int cleanerParallelism = Math.min((int) (cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).count()), config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
context.setJobStatus(this.getClass().getSimpleName(), "Perform cleaning of partitions");
Stream<Pair<String, CleanFileInfo>> filesToBeDeletedPerPartition = cleanerPlan.getFilePathsToBeDeletedPerPartition().entrySet().stream().flatMap(x -> x.getValue().stream().map(y -> new ImmutablePair<>(x.getKey(), new CleanFileInfo(y.getFilePath(), y.getIsBootstrapBaseFile()))));
Stream<ImmutablePair<String, PartitionCleanStat>> partitionCleanStats = context.mapPartitionsToPairAndReduceByKey(filesToBeDeletedPerPartition, iterator -> deleteFilesFunc(iterator, table), PartitionCleanStat::merge, cleanerParallelism);
Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Return PartitionCleanStat for each partition passed.
return cleanerPlan.getFilePathsToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
PartitionCleanStat partitionCleanStat = partitionCleanStatsMap.containsKey(partitionPath) ? partitionCleanStatsMap.get(partitionPath) : new PartitionCleanStat(partitionPath);
HoodieActionInstant actionInstant = cleanerPlan.getEarliestInstantToRetain();
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath).withEarliestCommitRetained(Option.ofNullable(actionInstant != null ? new HoodieInstant(HoodieInstant.State.valueOf(actionInstant.getState()), actionInstant.getAction(), actionInstant.getTimestamp()) : null)).withDeletePathPattern(partitionCleanStat.deletePathPatterns()).withSuccessfulDeletes(partitionCleanStat.successDeleteFiles()).withFailedDeletes(partitionCleanStat.failedDeleteFiles()).withDeleteBootstrapBasePathPatterns(partitionCleanStat.getDeleteBootstrapBasePathPatterns()).withSuccessfulDeleteBootstrapBaseFiles(partitionCleanStat.getSuccessfulDeleteBootstrapBaseFiles()).withFailedDeleteBootstrapBaseFiles(partitionCleanStat.getFailedDeleteBootstrapBaseFiles()).build();
}).collect(Collectors.toList());
}
Aggregations