Search in sources :

Example 1 with HoodieCleanerPlan

use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.

the class TestMetadataConversionUtils method createCleanMetadata.

private void createCleanMetadata(String instantTime) throws IOException {
    HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
    HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[new Random().nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
    HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
    HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Random(java.util.Random) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 2 with HoodieCleanerPlan

use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.

the class TestCleaner method testCleanPlanUpgradeDowngrade.

@Test
public void testCleanPlanUpgradeDowngrade() {
    String instantTime = "000";
    String partition1 = DEFAULT_PARTITION_PATHS[0];
    String partition2 = DEFAULT_PARTITION_PATHS[1];
    String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
    String fileName1 = "data1_1_000" + extension;
    String fileName2 = "data2_1_000" + extension;
    Map<String, List<String>> filesToBeCleanedPerPartition = new HashMap<>();
    filesToBeCleanedPerPartition.put(partition1, Arrays.asList(fileName1));
    filesToBeCleanedPerPartition.put(partition2, Arrays.asList(fileName2));
    HoodieCleanerPlan version1Plan = HoodieCleanerPlan.newBuilder().setEarliestInstantToRetain(HoodieActionInstant.newBuilder().setAction(HoodieTimeline.COMMIT_ACTION).setTimestamp(instantTime).setState(State.COMPLETED.name()).build()).setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).setFilesToBeDeletedPerPartition(filesToBeCleanedPerPartition).setVersion(CleanPlanV1MigrationHandler.VERSION).build();
    // Upgrade and Verify version 2 plan
    HoodieCleanerPlan version2Plan = new CleanPlanMigrator(metaClient).upgradeToLatest(version1Plan, version1Plan.getVersion());
    assertEquals(version1Plan.getEarliestInstantToRetain(), version2Plan.getEarliestInstantToRetain());
    assertEquals(version1Plan.getPolicy(), version2Plan.getPolicy());
    assertEquals(CleanPlanner.LATEST_CLEAN_PLAN_VERSION, version2Plan.getVersion());
    // Deprecated Field is not used.
    assertEquals(0, version2Plan.getFilesToBeDeletedPerPartition().size());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().size(), version2Plan.getFilePathsToBeDeletedPerPartition().size());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).size(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition1).size());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).size(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition2).size());
    assertEquals(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partition1), fileName1).toString(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition1).get(0).getFilePath());
    assertEquals(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partition2), fileName2).toString(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition2).get(0).getFilePath());
    // Downgrade and verify version 1 plan
    HoodieCleanerPlan gotVersion1Plan = new CleanPlanMigrator(metaClient).migrateToVersion(version2Plan, version2Plan.getVersion(), version1Plan.getVersion());
    assertEquals(version1Plan.getEarliestInstantToRetain(), gotVersion1Plan.getEarliestInstantToRetain());
    assertEquals(version1Plan.getPolicy(), version2Plan.getPolicy());
    assertEquals(version1Plan.getVersion(), gotVersion1Plan.getVersion());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().size(), gotVersion1Plan.getFilesToBeDeletedPerPartition().size());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).size(), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition1).size());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).size(), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition2).size());
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).get(0), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition1).get(0));
    assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).get(0), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition2).get(0));
    assertTrue(gotVersion1Plan.getFilePathsToBeDeletedPerPartition().isEmpty());
    assertNull(version1Plan.getFilePathsToBeDeletedPerPartition());
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) CleanPlanMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 3 with HoodieCleanerPlan

use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.

the class CleanerUtils method getCleanerPlan.

/**
 * Get Latest version of cleaner plan corresponding to a clean instant.
 * @param metaClient  Hoodie Table Meta Client
 * @param cleanInstant Instant referring to clean action
 * @return Cleaner plan corresponding to clean instant
 * @throws IOException
 */
public static HoodieCleanerPlan getCleanerPlan(HoodieTableMetaClient metaClient, HoodieInstant cleanInstant) throws IOException {
    CleanPlanMigrator cleanPlanMigrator = new CleanPlanMigrator(metaClient);
    HoodieCleanerPlan cleanerPlan = TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().readCleanerInfoAsBytes(cleanInstant).get(), HoodieCleanerPlan.class);
    return cleanPlanMigrator.upgradeToLatest(cleanerPlan, cleanerPlan.getVersion());
}
Also used : HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) CleanPlanMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator)

Example 4 with HoodieCleanerPlan

use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.

the class HoodieTestTable method addClean.

public HoodieTestTable addClean(String instantTime) throws IOException {
    HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
    HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
    HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
    return HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 5 with HoodieCleanerPlan

use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.

the class CleanPlanActionExecutor method requestClean.

/**
 * Generates List of files to be cleaned.
 *
 * @param context HoodieEngineContext
 * @return Cleaner Plan
 */
HoodieCleanerPlan requestClean(HoodieEngineContext context) {
    try {
        CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
        Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
        context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
        List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
        if (partitionsToClean.isEmpty()) {
            LOG.info("Nothing to clean here. It is already clean");
            return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
        }
        LOG.info("Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
        int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
        LOG.info("Using cleanerParallelism: " + cleanerParallelism);
        context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
        Map<String, List<HoodieCleanFileInfo>> cleanOps = context.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism).stream().collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
        return new HoodieCleanerPlan(earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null), config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(), CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to schedule clean operation", e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCleanFileInfo(org.apache.hudi.avro.model.HoodieCleanFileInfo) Collectors(java.util.stream.Collectors) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) List(java.util.List) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) List(java.util.List) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Aggregations

HoodieCleanerPlan (org.apache.hudi.avro.model.HoodieCleanerPlan)11 HoodieActionInstant (org.apache.hudi.avro.model.HoodieActionInstant)6 List (java.util.List)5 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)5 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)5 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)5 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 HoodieIOException (org.apache.hudi.exception.HoodieIOException)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3 Path (org.apache.hadoop.fs.Path)3 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)3 CleanerUtils (org.apache.hudi.common.util.CleanerUtils)3 Option (org.apache.hudi.common.util.Option)3 Pair (org.apache.hudi.common.util.collection.Pair)3 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)3 LogManager (org.apache.log4j.LogManager)3 Logger (org.apache.log4j.Logger)3