Search in sources :

Example 1 with HoodieActionInstant

use of org.apache.hudi.avro.model.HoodieActionInstant in project hudi by apache.

the class TestMetadataConversionUtils method createCleanMetadata.

private void createCleanMetadata(String instantTime) throws IOException {
    HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
    HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[new Random().nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
    HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
    HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Random(java.util.Random) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 2 with HoodieActionInstant

use of org.apache.hudi.avro.model.HoodieActionInstant in project hudi by apache.

the class HoodieTestTable method addClean.

public HoodieTestTable addClean(String instantTime) throws IOException {
    HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
    HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
    HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
    return HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 3 with HoodieActionInstant

use of org.apache.hudi.avro.model.HoodieActionInstant in project hudi by apache.

the class CleanPlanActionExecutor method requestClean.

/**
 * Generates List of files to be cleaned.
 *
 * @param context HoodieEngineContext
 * @return Cleaner Plan
 */
HoodieCleanerPlan requestClean(HoodieEngineContext context) {
    try {
        CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
        Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
        context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
        List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
        if (partitionsToClean.isEmpty()) {
            LOG.info("Nothing to clean here. It is already clean");
            return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
        }
        LOG.info("Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
        int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
        LOG.info("Using cleanerParallelism: " + cleanerParallelism);
        context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
        Map<String, List<HoodieCleanFileInfo>> cleanOps = context.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism).stream().collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
        return new HoodieCleanerPlan(earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null), config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(), CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to schedule clean operation", e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCleanFileInfo(org.apache.hudi.avro.model.HoodieCleanFileInfo) Collectors(java.util.stream.Collectors) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) List(java.util.List) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) List(java.util.List) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 4 with HoodieActionInstant

use of org.apache.hudi.avro.model.HoodieActionInstant in project hudi by apache.

the class HoodieTestTable method getHoodieCleanMetadata.

public Pair<HoodieCleanerPlan, HoodieCleanMetadata> getHoodieCleanMetadata(String commitTime, HoodieTestTableState testTableState) {
    HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(commitTime, CLEAN_ACTION, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
    List<HoodieCleanStat> cleanStats = new ArrayList<>();
    for (Map.Entry<String, List<String>> entry : testTableState.getPartitionToFileIdMapForCleaner(commitTime).entrySet()) {
        cleanStats.add(new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, entry.getKey(), entry.getValue(), entry.getValue(), Collections.emptyList(), commitTime));
    }
    return Pair.of(cleanerPlan, convertCleanMetadata(commitTime, Option.of(0L), cleanStats));
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) ArrayList(java.util.ArrayList) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) List(java.util.List) ArrayList(java.util.ArrayList) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap)

Example 5 with HoodieActionInstant

use of org.apache.hudi.avro.model.HoodieActionInstant in project hudi by apache.

the class CleanActionExecutor method clean.

/**
 * Performs cleaning of partition paths according to cleaning policy and returns the number of files cleaned. Handles
 * skews in partitions to clean by making files to clean as the unit of task distribution.
 *
 * @throws IllegalArgumentException if unknown cleaning policy is provided
 */
List<HoodieCleanStat> clean(HoodieEngineContext context, HoodieCleanerPlan cleanerPlan) {
    int cleanerParallelism = Math.min((int) (cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).count()), config.getCleanerParallelism());
    LOG.info("Using cleanerParallelism: " + cleanerParallelism);
    context.setJobStatus(this.getClass().getSimpleName(), "Perform cleaning of partitions");
    Stream<Pair<String, CleanFileInfo>> filesToBeDeletedPerPartition = cleanerPlan.getFilePathsToBeDeletedPerPartition().entrySet().stream().flatMap(x -> x.getValue().stream().map(y -> new ImmutablePair<>(x.getKey(), new CleanFileInfo(y.getFilePath(), y.getIsBootstrapBaseFile()))));
    Stream<ImmutablePair<String, PartitionCleanStat>> partitionCleanStats = context.mapPartitionsToPairAndReduceByKey(filesToBeDeletedPerPartition, iterator -> deleteFilesFunc(iterator, table), PartitionCleanStat::merge, cleanerParallelism);
    Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    // Return PartitionCleanStat for each partition passed.
    return cleanerPlan.getFilePathsToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
        PartitionCleanStat partitionCleanStat = partitionCleanStatsMap.containsKey(partitionPath) ? partitionCleanStatsMap.get(partitionPath) : new PartitionCleanStat(partitionPath);
        HoodieActionInstant actionInstant = cleanerPlan.getEarliestInstantToRetain();
        return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath).withEarliestCommitRetained(Option.ofNullable(actionInstant != null ? new HoodieInstant(HoodieInstant.State.valueOf(actionInstant.getState()), actionInstant.getAction(), actionInstant.getTimestamp()) : null)).withDeletePathPattern(partitionCleanStat.deletePathPatterns()).withSuccessfulDeletes(partitionCleanStat.successDeleteFiles()).withFailedDeletes(partitionCleanStat.failedDeleteFiles()).withDeleteBootstrapBasePathPatterns(partitionCleanStat.getDeleteBootstrapBasePathPatterns()).withSuccessfulDeleteBootstrapBaseFiles(partitionCleanStat.getSuccessfulDeleteBootstrapBaseFiles()).withFailedDeleteBootstrapBaseFiles(partitionCleanStat.getFailedDeleteBootstrapBaseFiles()).build();
    }).collect(Collectors.toList());
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieTable(org.apache.hudi.table.HoodieTable) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) CleanFileInfo(org.apache.hudi.common.model.CleanFileInfo) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Stream(java.util.stream.Stream) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CleanFileInfo(org.apache.hudi.common.model.CleanFileInfo) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) ArrayList(java.util.ArrayList) List(java.util.List) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair)

Aggregations

HoodieActionInstant (org.apache.hudi.avro.model.HoodieActionInstant)6 HoodieCleanerPlan (org.apache.hudi.avro.model.HoodieCleanerPlan)6 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)5 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)4 List (java.util.List)3 Map (java.util.Map)3 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Random (java.util.Random)2 Collectors (java.util.stream.Collectors)2 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)2 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)2 TimelineMetadataUtils (org.apache.hudi.common.table.timeline.TimelineMetadataUtils)2 CleanerUtils (org.apache.hudi.common.util.CleanerUtils)2 Option (org.apache.hudi.common.util.Option)2 Pair (org.apache.hudi.common.util.collection.Pair)2 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)2 HoodieIOException (org.apache.hudi.exception.HoodieIOException)2