use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.
the class TestMetadataConversionUtils method createCleanMetadata.
private void createCleanMetadata(String instantTime) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[new Random().nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.
the class TestCleaner method testCleanPlanUpgradeDowngrade.
@Test
public void testCleanPlanUpgradeDowngrade() {
String instantTime = "000";
String partition1 = DEFAULT_PARTITION_PATHS[0];
String partition2 = DEFAULT_PARTITION_PATHS[1];
String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
String fileName1 = "data1_1_000" + extension;
String fileName2 = "data2_1_000" + extension;
Map<String, List<String>> filesToBeCleanedPerPartition = new HashMap<>();
filesToBeCleanedPerPartition.put(partition1, Arrays.asList(fileName1));
filesToBeCleanedPerPartition.put(partition2, Arrays.asList(fileName2));
HoodieCleanerPlan version1Plan = HoodieCleanerPlan.newBuilder().setEarliestInstantToRetain(HoodieActionInstant.newBuilder().setAction(HoodieTimeline.COMMIT_ACTION).setTimestamp(instantTime).setState(State.COMPLETED.name()).build()).setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).setFilesToBeDeletedPerPartition(filesToBeCleanedPerPartition).setVersion(CleanPlanV1MigrationHandler.VERSION).build();
// Upgrade and Verify version 2 plan
HoodieCleanerPlan version2Plan = new CleanPlanMigrator(metaClient).upgradeToLatest(version1Plan, version1Plan.getVersion());
assertEquals(version1Plan.getEarliestInstantToRetain(), version2Plan.getEarliestInstantToRetain());
assertEquals(version1Plan.getPolicy(), version2Plan.getPolicy());
assertEquals(CleanPlanner.LATEST_CLEAN_PLAN_VERSION, version2Plan.getVersion());
// Deprecated Field is not used.
assertEquals(0, version2Plan.getFilesToBeDeletedPerPartition().size());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().size(), version2Plan.getFilePathsToBeDeletedPerPartition().size());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).size(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition1).size());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).size(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition2).size());
assertEquals(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partition1), fileName1).toString(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition1).get(0).getFilePath());
assertEquals(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partition2), fileName2).toString(), version2Plan.getFilePathsToBeDeletedPerPartition().get(partition2).get(0).getFilePath());
// Downgrade and verify version 1 plan
HoodieCleanerPlan gotVersion1Plan = new CleanPlanMigrator(metaClient).migrateToVersion(version2Plan, version2Plan.getVersion(), version1Plan.getVersion());
assertEquals(version1Plan.getEarliestInstantToRetain(), gotVersion1Plan.getEarliestInstantToRetain());
assertEquals(version1Plan.getPolicy(), version2Plan.getPolicy());
assertEquals(version1Plan.getVersion(), gotVersion1Plan.getVersion());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().size(), gotVersion1Plan.getFilesToBeDeletedPerPartition().size());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).size(), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition1).size());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).size(), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition2).size());
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).get(0), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition1).get(0));
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).get(0), gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition2).get(0));
assertTrue(gotVersion1Plan.getFilePathsToBeDeletedPerPartition().isEmpty());
assertNull(version1Plan.getFilePathsToBeDeletedPerPartition());
}
use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.
the class CleanerUtils method getCleanerPlan.
/**
* Get Latest version of cleaner plan corresponding to a clean instant.
* @param metaClient Hoodie Table Meta Client
* @param cleanInstant Instant referring to clean action
* @return Cleaner plan corresponding to clean instant
* @throws IOException
*/
public static HoodieCleanerPlan getCleanerPlan(HoodieTableMetaClient metaClient, HoodieInstant cleanInstant) throws IOException {
CleanPlanMigrator cleanPlanMigrator = new CleanPlanMigrator(metaClient);
HoodieCleanerPlan cleanerPlan = TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().readCleanerInfoAsBytes(cleanInstant).get(), HoodieCleanerPlan.class);
return cleanPlanMigrator.upgradeToLatest(cleanerPlan, cleanerPlan.getVersion());
}
use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.
the class HoodieTestTable method addClean.
public HoodieTestTable addClean(String instantTime) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
return HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata);
}
use of org.apache.hudi.avro.model.HoodieCleanerPlan in project hudi by apache.
the class CleanPlanActionExecutor method requestClean.
/**
* Generates List of files to be cleaned.
*
* @param context HoodieEngineContext
* @return Cleaner Plan
*/
HoodieCleanerPlan requestClean(HoodieEngineContext context) {
try {
CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
if (partitionsToClean.isEmpty()) {
LOG.info("Nothing to clean here. It is already clean");
return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
}
LOG.info("Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
Map<String, List<HoodieCleanFileInfo>> cleanOps = context.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism).stream().collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
return new HoodieCleanerPlan(earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null), config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(), CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
} catch (IOException e) {
throw new HoodieIOException("Failed to schedule clean operation", e);
}
}
Aggregations