use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class CleanMetadataV1MigrationHandler method downgradeFrom.
@Override
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
ValidationUtils.checkArgument(input.getVersion() == 2, "Input version is " + input.getVersion() + ". Must be 2");
final Path basePath = new Path(metaClient.getBasePath());
final Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata().entrySet().stream().map(entry -> {
final String partitionPath = entry.getKey();
final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setDeletePathPatterns(partitionMetadata.getDeletePathPatterns().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setSuccessDeleteFiles(partitionMetadata.getSuccessDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPartitionPath(partitionPath).setFailedDeleteFiles(partitionMetadata.getFailedDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionPath).build();
return Pair.of(partitionPath, cleanPartitionMetadata);
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
return HoodieCleanMetadata.newBuilder().setEarliestCommitToRetain(input.getEarliestCommitToRetain()).setStartCleanTime(input.getStartCleanTime()).setTimeTakenInMillis(input.getTimeTakenInMillis()).setTotalFilesDeleted(input.getTotalFilesDeleted()).setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class HoodieTestTable method addClean.
public HoodieTestTable addClean(String instantTime) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
return HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class ValidateAsyncOperations method execute.
@Override
public void execute(ExecutionContext executionContext, int curItrCount) throws Exception {
if (config.getIterationCountToExecute() == curItrCount) {
try {
log.warn("Executing ValidateHoodieAsyncOperations node {} with target base path {} ", this.getName(), executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath);
String basePath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath;
int maxCommitsRetained = executionContext.getHoodieTestSuiteWriter().getWriteConfig().getCleanerCommitsRetained() + 1;
FileSystem fs = FSUtils.getFs(basePath, executionContext.getHoodieTestSuiteWriter().getConfiguration());
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath).setConf(executionContext.getJsc().hadoopConfiguration()).build();
Option<HoodieInstant> latestCleanInstant = metaClient.getActiveTimeline().filter(instant -> instant.getAction().equals(HoodieTimeline.CLEAN_ACTION)).lastInstant();
if (latestCleanInstant.isPresent()) {
log.warn("Latest clean commit " + latestCleanInstant.get());
HoodieCleanMetadata cleanMetadata = CleanerUtils.getCleanerMetadata(metaClient, latestCleanInstant.get());
String earliestCommitToRetain = cleanMetadata.getEarliestCommitToRetain();
log.warn("Earliest commit to retain : " + earliestCommitToRetain);
long unCleanedInstants = metaClient.getActiveTimeline().filterCompletedInstants().filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, earliestCommitToRetain)).getInstants().count();
ValidationUtils.checkArgument(unCleanedInstants >= (maxCommitsRetained + 1), "Total uncleaned instants " + unCleanedInstants + " mismatched with max commits retained " + (maxCommitsRetained + 1));
}
if (config.validateArchival() || config.validateClean()) {
final Pattern ARCHIVE_FILE_PATTERN = Pattern.compile("\\.commits_\\.archive\\..*");
final Pattern CLEAN_FILE_PATTERN = Pattern.compile(".*\\.clean\\..*");
String metadataPath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/.hoodie";
FileStatus[] metaFileStatuses = fs.listStatus(new Path(metadataPath));
boolean cleanFound = false;
for (FileStatus fileStatus : metaFileStatuses) {
Matcher cleanFileMatcher = CLEAN_FILE_PATTERN.matcher(fileStatus.getPath().getName());
if (cleanFileMatcher.matches()) {
cleanFound = true;
break;
}
}
String archivalPath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/.hoodie/archived";
metaFileStatuses = fs.listStatus(new Path(archivalPath));
boolean archFound = false;
for (FileStatus fileStatus : metaFileStatuses) {
Matcher archFileMatcher = ARCHIVE_FILE_PATTERN.matcher(fileStatus.getPath().getName());
if (archFileMatcher.matches()) {
archFound = true;
}
}
if (config.validateArchival() && !archFound) {
throw new AssertionError("Archival NotFound in " + metadataPath);
}
if (config.validateClean() && !cleanFound) {
throw new AssertionError("Clean commits NotFound in " + metadataPath);
}
}
} catch (Exception e) {
log.warn("Exception thrown in ValidateHoodieAsyncOperations Node :: " + e.getCause() + ", msg :: " + e.getMessage());
throw e;
}
}
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class CleansCommand method showCleanPartitions.
@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
public String showCleanPartitions(@CliOption(key = { "clean" }, help = "clean to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime);
if (!timeline.containsInstant(cleanInstant)) {
return "Clean " + instantTime + " not found in metadata " + timeline;
}
HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
List<Comparable[]> rows = new ArrayList<>();
for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
String path = entry.getKey();
HoodieCleanPartitionMetadata stats = entry.getValue();
String policy = stats.getPolicy();
int totalSuccessDeletedFiles = stats.getSuccessDeleteFiles().size();
int totalFailedDeletedFiles = stats.getFailedDeleteFiles().size();
rows.add(new Comparable[] { path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles });
}
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_CLEANING_POLICY).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_SUCCESSFULLY_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FAILED_DELETIONS);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class CleanActionExecutor method runClean.
private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
ValidationUtils.checkArgument(cleanInstant.getState().equals(HoodieInstant.State.REQUESTED) || cleanInstant.getState().equals(HoodieInstant.State.INFLIGHT));
try {
final HoodieInstant inflightInstant;
final HoodieTimer timer = new HoodieTimer();
timer.startTimer();
if (cleanInstant.isRequested()) {
inflightInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
} else {
inflightInstant = cleanInstant;
}
List<HoodieCleanStat> cleanStats = clean(context, cleanerPlan);
if (cleanStats.isEmpty()) {
return HoodieCleanMetadata.newBuilder().build();
}
table.getMetaClient().reloadActiveTimeline();
HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(inflightInstant.getTimestamp(), Option.of(timer.endTimer()), cleanStats);
if (!skipLocking) {
this.txnManager.beginTransaction(Option.empty(), Option.empty());
}
writeTableMetadata(metadata, inflightInstant.getTimestamp());
table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
return metadata;
} catch (IOException e) {
throw new HoodieIOException("Failed to clean up after commit", e);
} finally {
if (!skipLocking) {
this.txnManager.endTransaction(Option.empty());
}
}
}
Aggregations