Search in sources :

Example 1 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class CleanerUtils method convertCleanMetadata.

public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime, Option<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
    Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = new HashMap<>();
    Map<String, HoodieCleanPartitionMetadata> partitionBootstrapMetadataMap = new HashMap<>();
    int totalDeleted = 0;
    String earliestCommitToRetain = null;
    for (HoodieCleanStat stat : cleanStats) {
        HoodieCleanPartitionMetadata metadata = new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(), stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
        partitionMetadataMap.put(stat.getPartitionPath(), metadata);
        if ((null != stat.getDeleteBootstrapBasePathPatterns()) && (!stat.getDeleteBootstrapBasePathPatterns().isEmpty())) {
            HoodieCleanPartitionMetadata bootstrapMetadata = new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(), stat.getDeleteBootstrapBasePathPatterns(), stat.getSuccessDeleteBootstrapBaseFiles(), stat.getFailedDeleteBootstrapBaseFiles());
            partitionBootstrapMetadataMap.put(stat.getPartitionPath(), bootstrapMetadata);
        }
        totalDeleted += stat.getSuccessDeleteFiles().size();
        if (earliestCommitToRetain == null) {
            // This will be the same for all partitions
            earliestCommitToRetain = stat.getEarliestCommitToRetain();
        }
    }
    return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted, earliestCommitToRetain, partitionMetadataMap, CLEAN_METADATA_VERSION_2, partitionBootstrapMetadataMap);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HashMap(java.util.HashMap) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata)

Example 2 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class CleanMetadataV1MigrationHandler method downgradeFrom.

@Override
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
    ValidationUtils.checkArgument(input.getVersion() == 2, "Input version is " + input.getVersion() + ". Must be 2");
    final Path basePath = new Path(metaClient.getBasePath());
    final Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata().entrySet().stream().map(entry -> {
        final String partitionPath = entry.getKey();
        final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
        HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setDeletePathPatterns(partitionMetadata.getDeletePathPatterns().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setSuccessDeleteFiles(partitionMetadata.getSuccessDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPartitionPath(partitionPath).setFailedDeleteFiles(partitionMetadata.getFailedDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionPath).build();
        return Pair.of(partitionPath, cleanPartitionMetadata);
    }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    return HoodieCleanMetadata.newBuilder().setEarliestCommitToRetain(input.getEarliestCommitToRetain()).setStartCleanTime(input.getStartCleanTime()).setTimeTakenInMillis(input.getTimeTakenInMillis()).setTotalFilesDeleted(input.getTotalFilesDeleted()).setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
}
Also used : Path(org.apache.hadoop.fs.Path) AbstractMigratorBase(org.apache.hudi.common.table.timeline.versioning.AbstractMigratorBase) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) Collectors(java.util.stream.Collectors) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata)

Example 3 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class CleansCommand method showCleanPartitions.

@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
public String showCleanPartitions(@CliOption(key = { "clean" }, help = "clean to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
    HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime);
    if (!timeline.containsInstant(cleanInstant)) {
        return "Clean " + instantTime + " not found in metadata " + timeline;
    }
    HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
        String path = entry.getKey();
        HoodieCleanPartitionMetadata stats = entry.getValue();
        String policy = stats.getPolicy();
        int totalSuccessDeletedFiles = stats.getSuccessDeleteFiles().size();
        int totalFailedDeletedFiles = stats.getFailedDeleteFiles().size();
        rows.add(new Comparable[] { path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles });
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_CLEANING_POLICY).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_SUCCESSFULLY_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FAILED_DELETIONS);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) ArrayList(java.util.ArrayList) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 4 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class CleanMetadataV2MigrationHandler method upgradeFrom.

@Override
public HoodieCleanMetadata upgradeFrom(HoodieCleanMetadata input) {
    ValidationUtils.checkArgument(input.getVersion() == 1, "Input version is " + input.getVersion() + ". Must be 1");
    HoodieCleanMetadata metadata = new HoodieCleanMetadata();
    metadata.setEarliestCommitToRetain(input.getEarliestCommitToRetain());
    metadata.setTimeTakenInMillis(input.getTimeTakenInMillis());
    metadata.setStartCleanTime(input.getStartCleanTime());
    metadata.setTotalFilesDeleted(input.getTotalFilesDeleted());
    metadata.setVersion(getManagedVersion());
    Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata().entrySet().stream().map(entry -> {
        final String partitionPath = entry.getKey();
        final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
        final List<String> deletePathPatterns = convertToV2Path(partitionMetadata.getDeletePathPatterns());
        final List<String> successDeleteFiles = convertToV2Path(partitionMetadata.getSuccessDeleteFiles());
        final List<String> failedDeleteFiles = convertToV2Path(partitionMetadata.getFailedDeleteFiles());
        final HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionMetadata.getPartitionPath()).setDeletePathPatterns(deletePathPatterns).setSuccessDeleteFiles(successDeleteFiles).setFailedDeleteFiles(failedDeleteFiles).build();
        return Pair.of(partitionPath, cleanPartitionMetadata);
    }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    return HoodieCleanMetadata.newBuilder().setEarliestCommitToRetain(input.getEarliestCommitToRetain()).setStartCleanTime(input.getStartCleanTime()).setTimeTakenInMillis(input.getTimeTakenInMillis()).setTotalFilesDeleted(input.getTotalFilesDeleted()).setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
}
Also used : AbstractMigratorBase(org.apache.hudi.common.table.timeline.versioning.AbstractMigratorBase) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) Collectors(java.util.stream.Collectors) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) List(java.util.List)

Example 5 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class TestCleaner method assertCleanMetadataPathEquals.

private static void assertCleanMetadataPathEquals(Map<String, Tuple3> expected, HoodieCleanMetadata actual) {
    Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = actual.getPartitionMetadata();
    for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : partitionMetadataMap.entrySet()) {
        String partitionPath = entry.getKey();
        HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
        assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
        assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
        assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
    }
}
Also used : HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

HoodieCleanPartitionMetadata (org.apache.hudi.avro.model.HoodieCleanPartitionMetadata)7 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 ArrayList (java.util.ArrayList)3 Collectors (java.util.stream.Collectors)3 Path (org.apache.hadoop.fs.Path)3 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)3 Pair (org.apache.hudi.common.util.collection.Pair)3 List (java.util.List)2 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)2 File (java.io.File)1 IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1