Search in sources :

Example 6 with HoodieCleanMetadata

use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.

the class CleanMetadataV1MigrationHandler method downgradeFrom.

@Override
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
    ValidationUtils.checkArgument(input.getVersion() == 2, "Input version is " + input.getVersion() + ". Must be 2");
    final Path basePath = new Path(metaClient.getBasePath());
    final Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata().entrySet().stream().map(entry -> {
        final String partitionPath = entry.getKey();
        final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
        HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setDeletePathPatterns(partitionMetadata.getDeletePathPatterns().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setSuccessDeleteFiles(partitionMetadata.getSuccessDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPartitionPath(partitionPath).setFailedDeleteFiles(partitionMetadata.getFailedDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionPath).build();
        return Pair.of(partitionPath, cleanPartitionMetadata);
    }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    return HoodieCleanMetadata.newBuilder().setEarliestCommitToRetain(input.getEarliestCommitToRetain()).setStartCleanTime(input.getStartCleanTime()).setTimeTakenInMillis(input.getTimeTakenInMillis()).setTotalFilesDeleted(input.getTotalFilesDeleted()).setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
}
Also used : Path(org.apache.hadoop.fs.Path) AbstractMigratorBase(org.apache.hudi.common.table.timeline.versioning.AbstractMigratorBase) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) Collectors(java.util.stream.Collectors) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata)

Example 7 with HoodieCleanMetadata

use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.

the class HoodieTestTable method addClean.

public HoodieTestTable addClean(String instantTime) throws IOException {
    HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
    HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
    HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
    return HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 8 with HoodieCleanMetadata

use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.

the class ValidateAsyncOperations method execute.

@Override
public void execute(ExecutionContext executionContext, int curItrCount) throws Exception {
    if (config.getIterationCountToExecute() == curItrCount) {
        try {
            log.warn("Executing ValidateHoodieAsyncOperations node {} with target base path {} ", this.getName(), executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath);
            String basePath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath;
            int maxCommitsRetained = executionContext.getHoodieTestSuiteWriter().getWriteConfig().getCleanerCommitsRetained() + 1;
            FileSystem fs = FSUtils.getFs(basePath, executionContext.getHoodieTestSuiteWriter().getConfiguration());
            HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath).setConf(executionContext.getJsc().hadoopConfiguration()).build();
            Option<HoodieInstant> latestCleanInstant = metaClient.getActiveTimeline().filter(instant -> instant.getAction().equals(HoodieTimeline.CLEAN_ACTION)).lastInstant();
            if (latestCleanInstant.isPresent()) {
                log.warn("Latest clean commit " + latestCleanInstant.get());
                HoodieCleanMetadata cleanMetadata = CleanerUtils.getCleanerMetadata(metaClient, latestCleanInstant.get());
                String earliestCommitToRetain = cleanMetadata.getEarliestCommitToRetain();
                log.warn("Earliest commit to retain : " + earliestCommitToRetain);
                long unCleanedInstants = metaClient.getActiveTimeline().filterCompletedInstants().filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, earliestCommitToRetain)).getInstants().count();
                ValidationUtils.checkArgument(unCleanedInstants >= (maxCommitsRetained + 1), "Total uncleaned instants " + unCleanedInstants + " mismatched with max commits retained " + (maxCommitsRetained + 1));
            }
            if (config.validateArchival() || config.validateClean()) {
                final Pattern ARCHIVE_FILE_PATTERN = Pattern.compile("\\.commits_\\.archive\\..*");
                final Pattern CLEAN_FILE_PATTERN = Pattern.compile(".*\\.clean\\..*");
                String metadataPath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/.hoodie";
                FileStatus[] metaFileStatuses = fs.listStatus(new Path(metadataPath));
                boolean cleanFound = false;
                for (FileStatus fileStatus : metaFileStatuses) {
                    Matcher cleanFileMatcher = CLEAN_FILE_PATTERN.matcher(fileStatus.getPath().getName());
                    if (cleanFileMatcher.matches()) {
                        cleanFound = true;
                        break;
                    }
                }
                String archivalPath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/.hoodie/archived";
                metaFileStatuses = fs.listStatus(new Path(archivalPath));
                boolean archFound = false;
                for (FileStatus fileStatus : metaFileStatuses) {
                    Matcher archFileMatcher = ARCHIVE_FILE_PATTERN.matcher(fileStatus.getPath().getName());
                    if (archFileMatcher.matches()) {
                        archFound = true;
                    }
                }
                if (config.validateArchival() && !archFound) {
                    throw new AssertionError("Archival NotFound in " + metadataPath);
                }
                if (config.validateClean() && !cleanFound) {
                    throw new AssertionError("Clean commits NotFound in " + metadataPath);
                }
            }
        } catch (Exception e) {
            log.warn("Exception thrown in ValidateHoodieAsyncOperations Node :: " + e.getCause() + ", msg :: " + e.getMessage());
            throw e;
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) ExecutionContext(org.apache.hudi.integ.testsuite.dag.ExecutionContext) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) Collectors(java.util.stream.Collectors) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Matcher(java.util.regex.Matcher) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Config(org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Pattern(java.util.regex.Pattern) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Path(org.apache.hadoop.fs.Path) Pattern(java.util.regex.Pattern) FileStatus(org.apache.hadoop.fs.FileStatus) Matcher(java.util.regex.Matcher) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 9 with HoodieCleanMetadata

use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.

the class CleansCommand method showCleanPartitions.

@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
public String showCleanPartitions(@CliOption(key = { "clean" }, help = "clean to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
    HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime);
    if (!timeline.containsInstant(cleanInstant)) {
        return "Clean " + instantTime + " not found in metadata " + timeline;
    }
    HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
        String path = entry.getKey();
        HoodieCleanPartitionMetadata stats = entry.getValue();
        String policy = stats.getPolicy();
        int totalSuccessDeletedFiles = stats.getSuccessDeleteFiles().size();
        int totalFailedDeletedFiles = stats.getFailedDeleteFiles().size();
        rows.add(new Comparable[] { path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles });
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_CLEANING_POLICY).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_SUCCESSFULLY_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FAILED_DELETIONS);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) ArrayList(java.util.ArrayList) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 10 with HoodieCleanMetadata

use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.

the class CleanActionExecutor method runClean.

private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
    ValidationUtils.checkArgument(cleanInstant.getState().equals(HoodieInstant.State.REQUESTED) || cleanInstant.getState().equals(HoodieInstant.State.INFLIGHT));
    try {
        final HoodieInstant inflightInstant;
        final HoodieTimer timer = new HoodieTimer();
        timer.startTimer();
        if (cleanInstant.isRequested()) {
            inflightInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
        } else {
            inflightInstant = cleanInstant;
        }
        List<HoodieCleanStat> cleanStats = clean(context, cleanerPlan);
        if (cleanStats.isEmpty()) {
            return HoodieCleanMetadata.newBuilder().build();
        }
        table.getMetaClient().reloadActiveTimeline();
        HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(inflightInstant.getTimestamp(), Option.of(timer.endTimer()), cleanStats);
        if (!skipLocking) {
            this.txnManager.beginTransaction(Option.empty(), Option.empty());
        }
        writeTableMetadata(metadata, inflightInstant.getTimestamp());
        table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
        LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
        return metadata;
    } catch (IOException e) {
        throw new HoodieIOException("Failed to clean up after commit", e);
    } finally {
        if (!skipLocking) {
            this.txnManager.endTransaction(Option.empty());
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)22 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)11 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)8 HashMap (java.util.HashMap)7 Map (java.util.Map)7 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)7 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)7 ArrayList (java.util.ArrayList)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 Path (org.apache.hadoop.fs.Path)6 HoodieCleanPartitionMetadata (org.apache.hudi.avro.model.HoodieCleanPartitionMetadata)6 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)6 IOException (java.io.IOException)5 HoodieCleanerPlan (org.apache.hudi.avro.model.HoodieCleanerPlan)5 FSUtils (org.apache.hudi.common.fs.FSUtils)5 Pair (org.apache.hudi.common.util.collection.Pair)5 Test (org.junit.jupiter.api.Test)5 HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)4 CleanerUtils (org.apache.hudi.common.util.CleanerUtils)4