use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class TestCleaner method runCleaner.
/**
* Helper to run cleaner and collect Clean Stats.
*
* @param config HoodieWriteConfig
*/
private List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simulateRetryFailure, int firstCommitSequence) throws IOException {
SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(config);
String cleanInstantTs = makeNewCommitTime(firstCommitSequence);
HoodieCleanMetadata cleanMetadata1 = writeClient.clean(cleanInstantTs);
if (null == cleanMetadata1) {
return new ArrayList<>();
}
if (simulateRetryFailure) {
HoodieInstant completedCleanInstant = new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, cleanInstantTs);
HoodieCleanMetadata metadata = CleanerUtils.getCleanerMetadata(metaClient, completedCleanInstant);
metadata.getPartitionMetadata().values().forEach(p -> {
String dirPath = metaClient.getBasePath() + "/" + p.getPartitionPath();
p.getSuccessDeleteFiles().forEach(p2 -> {
try {
metaClient.getFs().create(new Path(dirPath, p2), true);
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
});
metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant);
// retry clean operation again
writeClient.clean();
final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant);
cleanMetadata1.getPartitionMetadata().keySet().forEach(k -> {
HoodieCleanPartitionMetadata p1 = cleanMetadata1.getPartitionMetadata().get(k);
HoodieCleanPartitionMetadata p2 = retriedCleanMetadata.getPartitionMetadata().get(k);
assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
assertEquals(p1.getSuccessDeleteFiles(), p2.getSuccessDeleteFiles());
assertEquals(p1.getFailedDeleteFiles(), p2.getFailedDeleteFiles());
assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
assertEquals(k, p1.getPartitionPath());
});
}
Map<String, HoodieCleanStat> cleanStatMap = cleanMetadata1.getPartitionMetadata().values().stream().map(x -> new HoodieCleanStat.Builder().withPartitionPath(x.getPartitionPath()).withFailedDeletes(x.getFailedDeleteFiles()).withSuccessfulDeletes(x.getSuccessDeleteFiles()).withPolicy(HoodieCleaningPolicy.valueOf(x.getPolicy())).withDeletePathPattern(x.getDeletePathPatterns()).withEarliestCommitRetained(Option.ofNullable(cleanMetadata1.getEarliestCommitToRetain() != null ? new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "000") : null)).build()).collect(Collectors.toMap(HoodieCleanStat::getPartitionPath, x -> x));
cleanMetadata1.getBootstrapPartitionMetadata().values().forEach(x -> {
HoodieCleanStat s = cleanStatMap.get(x.getPartitionPath());
cleanStatMap.put(x.getPartitionPath(), new HoodieCleanStat.Builder().withPartitionPath(x.getPartitionPath()).withFailedDeletes(s.getFailedDeleteFiles()).withSuccessfulDeletes(s.getSuccessDeleteFiles()).withPolicy(HoodieCleaningPolicy.valueOf(x.getPolicy())).withDeletePathPattern(s.getDeletePathPatterns()).withEarliestCommitRetained(Option.ofNullable(s.getEarliestCommitToRetain()).map(y -> new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, y))).withSuccessfulDeleteBootstrapBaseFiles(x.getSuccessDeleteFiles()).withFailedDeleteBootstrapBaseFiles(x.getFailedDeleteFiles()).withDeleteBootstrapBasePathPatterns(x.getDeletePathPatterns()).build());
});
return new ArrayList<>(cleanStatMap.values());
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class HoodieClientTestHarness method createCleanMetadata.
public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly, boolean isEmpty) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
if (inflightOnly) {
HoodieTestTable.of(metaClient).addInflightClean(instantTime, cleanerPlan);
} else {
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, HoodieTestUtils.DEFAULT_PARTITION_PATHS[new Random().nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)], Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), instantTime);
HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata, isEmpty);
}
return new HoodieInstant(inflightOnly, "clean", instantTime);
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class TestIncrementalFSViewSync method performClean.
/**
* Simulate a Cleaner operation cleaning up an instant.
*
* @param instant Instant to be cleaner
* @param files List of files to be deleted
* @param cleanInstant Cleaner Instant
*/
private void performClean(String instant, List<String> files, String cleanInstant) throws IOException {
Map<String, List<String>> partititonToFiles = deleteFiles(files);
List<HoodieCleanStat> cleanStats = partititonToFiles.entrySet().stream().map(e -> new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS, e.getKey(), e.getValue(), e.getValue(), new ArrayList<>(), Integer.toString(Integer.parseInt(instant) + 1))).collect(Collectors.toList());
HoodieInstant cleanInflightInstant = new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant);
metaClient.getActiveTimeline().createNewInstant(cleanInflightInstant);
HoodieCleanMetadata cleanMetadata = CleanerUtils.convertCleanMetadata(cleanInstant, Option.empty(), cleanStats);
metaClient.getActiveTimeline().saveAsComplete(cleanInflightInstant, TimelineMetadataUtils.serializeCleanMetadata(cleanMetadata));
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class TestTimelineUtils method getCleanMetadata.
private Option<byte[]> getCleanMetadata(String partition, String time) throws IOException {
Map<String, HoodieCleanPartitionMetadata> partitionToFilesCleaned = new HashMap<>();
List<String> filesDeleted = new ArrayList<>();
filesDeleted.add("file-" + partition + "-" + time + "1");
filesDeleted.add("file-" + partition + "-" + time + "2");
HoodieCleanPartitionMetadata partitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setPartitionPath(partition).setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).setFailedDeleteFiles(Collections.emptyList()).setDeletePathPatterns(Collections.emptyList()).setSuccessDeleteFiles(filesDeleted).build();
partitionToFilesCleaned.putIfAbsent(partition, partitionMetadata);
HoodieCleanMetadata cleanMetadata = HoodieCleanMetadata.newBuilder().setVersion(1).setTimeTakenInMillis(100).setTotalFilesDeleted(1).setStartCleanTime(time).setEarliestCommitToRetain(time).setPartitionMetadata(partitionToFilesCleaned).build();
return TimelineMetadataUtils.serializeCleanMetadata(cleanMetadata);
}
use of org.apache.hudi.avro.model.HoodieCleanMetadata in project hudi by apache.
the class ExportCommand method copyNonArchivedInstants.
private int copyNonArchivedInstants(List<HoodieInstant> instants, int limit, String localFolder) throws Exception {
int copyCount = 0;
if (instants.isEmpty()) {
return limit;
}
final Logger LOG = LogManager.getLogger(ExportCommand.class);
final HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
final HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
for (HoodieInstant instant : instants) {
String localPath = localFolder + Path.SEPARATOR + instant.getFileName();
byte[] data = null;
switch(instant.getAction()) {
case HoodieTimeline.CLEAN_ACTION:
{
HoodieCleanMetadata metadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
data = HoodieAvroUtils.avroToJson(metadata, true);
break;
}
case HoodieTimeline.DELTA_COMMIT_ACTION:
case HoodieTimeline.COMMIT_ACTION:
case HoodieTimeline.COMPACTION_ACTION:
{
// Already in json format
data = timeline.getInstantDetails(instant).get();
break;
}
case HoodieTimeline.ROLLBACK_ACTION:
{
HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(timeline.getInstantDetails(instant).get());
data = HoodieAvroUtils.avroToJson(metadata, true);
break;
}
case HoodieTimeline.SAVEPOINT_ACTION:
{
HoodieSavepointMetadata metadata = TimelineMetadataUtils.deserializeHoodieSavepointMetadata(timeline.getInstantDetails(instant).get());
data = HoodieAvroUtils.avroToJson(metadata, true);
break;
}
default:
{
throw new HoodieException("Unknown type of action " + instant.getAction());
}
}
if (data != null) {
writeToFile(localPath, data);
}
}
return copyCount;
}
Aggregations