use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.
the class HoodieTableMetadataUtil method processRollbackMetadata.
/**
* Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
* <p>
* During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
* function will extract this change file for each partition.
*
* @param metadataTableTimeline Current timeline of the Metadata Table
* @param rollbackMetadata {@code HoodieRollbackMetadata}
* @param partitionToDeletedFiles The {@code Map} to fill with files deleted per partition.
* @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
*/
private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata, Map<String, List<String>> partitionToDeletedFiles, Map<String, Map<String, Long>> partitionToAppendedFiles, Option<String> lastSyncTs) {
rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
final String instantToRollback = rollbackMetadata.getCommitsRollback().get(0);
// Has this rollback produced new files?
boolean hasRollbackLogFiles = pm.getRollbackLogFiles() != null && !pm.getRollbackLogFiles().isEmpty();
boolean hasNonZeroRollbackLogFiles = hasRollbackLogFiles && pm.getRollbackLogFiles().values().stream().mapToLong(Long::longValue).sum() > 0;
// If instant-to-rollback has not been synced to metadata table yet then there is no need to update metadata
// This can happen in two cases:
// Case 1: Metadata Table timeline is behind the instant-to-rollback.
boolean shouldSkip = lastSyncTs.isPresent() && HoodieTimeline.compareTimestamps(instantToRollback, HoodieTimeline.GREATER_THAN, lastSyncTs.get());
if (!hasNonZeroRollbackLogFiles && shouldSkip) {
LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, given metadata table is already synced upto to %s", instantToRollback, lastSyncTs.get()));
return;
}
// Case 2: The instant-to-rollback was never committed to Metadata Table. This can happen if the instant-to-rollback
// was a failed commit (never completed) as only completed instants are synced to Metadata Table.
// But the required Metadata Table instants should not have been archived
HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, instantToRollback);
if (metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
throw new HoodieMetadataException(String.format("The instant %s required to sync rollback of %s has been archived", syncedInstant, instantToRollback));
}
shouldSkip = !metadataTableTimeline.containsInstant(syncedInstant);
if (!hasNonZeroRollbackLogFiles && shouldSkip) {
LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, since this instant was never committed to Metadata Table", instantToRollback));
return;
}
final String partition = pm.getPartitionPath();
if ((!pm.getSuccessDeleteFiles().isEmpty() || !pm.getFailedDeleteFiles().isEmpty()) && !shouldSkip) {
if (!partitionToDeletedFiles.containsKey(partition)) {
partitionToDeletedFiles.put(partition, new ArrayList<>());
}
// Extract deleted file name from the absolute paths saved in getSuccessDeleteFiles()
List<String> deletedFiles = pm.getSuccessDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList());
if (!pm.getFailedDeleteFiles().isEmpty()) {
deletedFiles.addAll(pm.getFailedDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList()));
}
partitionToDeletedFiles.get(partition).addAll(deletedFiles);
}
BiFunction<Long, Long, Long> fileMergeFn = (oldSize, newSizeCopy) -> {
// as rollback file could have been updated after written log files are computed.
return oldSize > newSizeCopy ? oldSize : newSizeCopy;
};
if (hasRollbackLogFiles) {
if (!partitionToAppendedFiles.containsKey(partition)) {
partitionToAppendedFiles.put(partition, new HashMap<>());
}
// Extract appended file name from the absolute paths saved in getAppendFiles()
pm.getRollbackLogFiles().forEach((path, size) -> {
partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
});
}
});
}
use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.
the class ExportCommand method copyNonArchivedInstants.
private int copyNonArchivedInstants(List<HoodieInstant> instants, int limit, String localFolder) throws Exception {
int copyCount = 0;
if (instants.isEmpty()) {
return limit;
}
final Logger LOG = LogManager.getLogger(ExportCommand.class);
final HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
final HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
for (HoodieInstant instant : instants) {
String localPath = localFolder + Path.SEPARATOR + instant.getFileName();
byte[] data = null;
switch(instant.getAction()) {
case HoodieTimeline.CLEAN_ACTION:
{
HoodieCleanMetadata metadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
data = HoodieAvroUtils.avroToJson(metadata, true);
break;
}
case HoodieTimeline.DELTA_COMMIT_ACTION:
case HoodieTimeline.COMMIT_ACTION:
case HoodieTimeline.COMPACTION_ACTION:
{
// Already in json format
data = timeline.getInstantDetails(instant).get();
break;
}
case HoodieTimeline.ROLLBACK_ACTION:
{
HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(timeline.getInstantDetails(instant).get());
data = HoodieAvroUtils.avroToJson(metadata, true);
break;
}
case HoodieTimeline.SAVEPOINT_ACTION:
{
HoodieSavepointMetadata metadata = TimelineMetadataUtils.deserializeHoodieSavepointMetadata(timeline.getInstantDetails(instant).get());
data = HoodieAvroUtils.avroToJson(metadata, true);
break;
}
default:
{
throw new HoodieException("Unknown type of action " + instant.getAction());
}
}
if (data != null) {
writeToFile(localPath, data);
}
}
return copyCount;
}
use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.
the class RollbacksCommand method showRollbacks.
@CliCommand(value = "show rollbacks", help = "List all rollback instants")
public String showRollbacks(@CliOption(key = { "limit" }, help = "Limit #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) {
HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.getTableMetaClient());
HoodieTimeline rollback = activeTimeline.getRollbackTimeline().filterCompletedInstants();
final List<Comparable[]> rows = new ArrayList<>();
rollback.getInstants().forEach(instant -> {
try {
HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
metadata.getCommitsRollback().forEach(c -> {
Comparable[] row = new Comparable[5];
row[0] = metadata.getStartRollbackTime();
row[1] = c;
row[2] = metadata.getTotalFilesDeleted();
row[3] = metadata.getTimeTakenInMillis();
row[4] = metadata.getPartitionMetadata() != null ? metadata.getPartitionMetadata().size() : 0;
rows.add(row);
});
} catch (IOException e) {
e.printStackTrace();
}
});
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TIME_TOKEN_MILLIS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_PARTITIONS);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Aggregations