Search in sources :

Example 21 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class HoodieTableMetadataUtil method processRollbackMetadata.

/**
 * Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
 * <p>
 * During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
 * function will extract this change file for each partition.
 *
 * @param metadataTableTimeline    Current timeline of the Metadata Table
 * @param rollbackMetadata         {@code HoodieRollbackMetadata}
 * @param partitionToDeletedFiles  The {@code Map} to fill with files deleted per partition.
 * @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
 */
private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata, Map<String, List<String>> partitionToDeletedFiles, Map<String, Map<String, Long>> partitionToAppendedFiles, Option<String> lastSyncTs) {
    rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
        final String instantToRollback = rollbackMetadata.getCommitsRollback().get(0);
        // Has this rollback produced new files?
        boolean hasRollbackLogFiles = pm.getRollbackLogFiles() != null && !pm.getRollbackLogFiles().isEmpty();
        boolean hasNonZeroRollbackLogFiles = hasRollbackLogFiles && pm.getRollbackLogFiles().values().stream().mapToLong(Long::longValue).sum() > 0;
        // If instant-to-rollback has not been synced to metadata table yet then there is no need to update metadata
        // This can happen in two cases:
        // Case 1: Metadata Table timeline is behind the instant-to-rollback.
        boolean shouldSkip = lastSyncTs.isPresent() && HoodieTimeline.compareTimestamps(instantToRollback, HoodieTimeline.GREATER_THAN, lastSyncTs.get());
        if (!hasNonZeroRollbackLogFiles && shouldSkip) {
            LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, given metadata table is already synced upto to %s", instantToRollback, lastSyncTs.get()));
            return;
        }
        // Case 2: The instant-to-rollback was never committed to Metadata Table. This can happen if the instant-to-rollback
        // was a failed commit (never completed) as only completed instants are synced to Metadata Table.
        // But the required Metadata Table instants should not have been archived
        HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, instantToRollback);
        if (metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
            throw new HoodieMetadataException(String.format("The instant %s required to sync rollback of %s has been archived", syncedInstant, instantToRollback));
        }
        shouldSkip = !metadataTableTimeline.containsInstant(syncedInstant);
        if (!hasNonZeroRollbackLogFiles && shouldSkip) {
            LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, since this instant was never committed to Metadata Table", instantToRollback));
            return;
        }
        final String partition = pm.getPartitionPath();
        if ((!pm.getSuccessDeleteFiles().isEmpty() || !pm.getFailedDeleteFiles().isEmpty()) && !shouldSkip) {
            if (!partitionToDeletedFiles.containsKey(partition)) {
                partitionToDeletedFiles.put(partition, new ArrayList<>());
            }
            // Extract deleted file name from the absolute paths saved in getSuccessDeleteFiles()
            List<String> deletedFiles = pm.getSuccessDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList());
            if (!pm.getFailedDeleteFiles().isEmpty()) {
                deletedFiles.addAll(pm.getFailedDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList()));
            }
            partitionToDeletedFiles.get(partition).addAll(deletedFiles);
        }
        BiFunction<Long, Long, Long> fileMergeFn = (oldSize, newSizeCopy) -> {
            // as rollback file could have been updated after written log files are computed.
            return oldSize > newSizeCopy ? oldSize : newSizeCopy;
        };
        if (hasRollbackLogFiles) {
            if (!partitionToAppendedFiles.containsKey(partition)) {
                partitionToAppendedFiles.put(partition, new HashMap<>());
            }
            // Extract appended file name from the absolute paths saved in getAppendFiles()
            pm.getRollbackLogFiles().forEach((path, size) -> {
                partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
            });
        }
    });
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieColumnRangeMetadata(org.apache.hudi.common.model.HoodieColumnRangeMetadata) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BiFunction(java.util.function.BiFunction) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) ByteBuffer(java.nio.ByteBuffer) MAX(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX) Logger(org.apache.log4j.Logger) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) Collectors(java.util.stream.Collectors) TOTAL_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE) Objects(java.util.Objects) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) VALUE_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) TOTAL_UNCOMPRESSED_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE) EMPTY_PARTITION_NAME(org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) NULL_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) LinkedList(java.util.LinkedList) Nonnull(javax.annotation.Nonnull) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString) GenericRecord(org.apache.avro.generic.GenericRecord) MIN(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN) HoodieData(org.apache.hudi.common.data.HoodieData) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) ParquetUtils(org.apache.hudi.common.util.ParquetUtils) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) NON_PARTITIONED_NAME(org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) COLUMN_RANGE_MERGE_FUNCTION(org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString)

Example 22 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class ExportCommand method copyNonArchivedInstants.

private int copyNonArchivedInstants(List<HoodieInstant> instants, int limit, String localFolder) throws Exception {
    int copyCount = 0;
    if (instants.isEmpty()) {
        return limit;
    }
    final Logger LOG = LogManager.getLogger(ExportCommand.class);
    final HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
    final HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
    for (HoodieInstant instant : instants) {
        String localPath = localFolder + Path.SEPARATOR + instant.getFileName();
        byte[] data = null;
        switch(instant.getAction()) {
            case HoodieTimeline.CLEAN_ACTION:
                {
                    HoodieCleanMetadata metadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
                    data = HoodieAvroUtils.avroToJson(metadata, true);
                    break;
                }
            case HoodieTimeline.DELTA_COMMIT_ACTION:
            case HoodieTimeline.COMMIT_ACTION:
            case HoodieTimeline.COMPACTION_ACTION:
                {
                    // Already in json format
                    data = timeline.getInstantDetails(instant).get();
                    break;
                }
            case HoodieTimeline.ROLLBACK_ACTION:
                {
                    HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(timeline.getInstantDetails(instant).get());
                    data = HoodieAvroUtils.avroToJson(metadata, true);
                    break;
                }
            case HoodieTimeline.SAVEPOINT_ACTION:
                {
                    HoodieSavepointMetadata metadata = TimelineMetadataUtils.deserializeHoodieSavepointMetadata(timeline.getInstantDetails(instant).get());
                    data = HoodieAvroUtils.avroToJson(metadata, true);
                    break;
                }
            default:
                {
                    throw new HoodieException("Unknown type of action " + instant.getAction());
                }
        }
        if (data != null) {
            writeToFile(localPath, data);
        }
    }
    return copyCount;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieException(org.apache.hudi.exception.HoodieException) Logger(org.apache.log4j.Logger) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata)

Example 23 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class RollbacksCommand method showRollbacks.

@CliCommand(value = "show rollbacks", help = "List all rollback instants")
public String showRollbacks(@CliOption(key = { "limit" }, help = "Limit #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) {
    HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.getTableMetaClient());
    HoodieTimeline rollback = activeTimeline.getRollbackTimeline().filterCompletedInstants();
    final List<Comparable[]> rows = new ArrayList<>();
    rollback.getInstants().forEach(instant -> {
        try {
            HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
            metadata.getCommitsRollback().forEach(c -> {
                Comparable[] row = new Comparable[5];
                row[0] = metadata.getStartRollbackTime();
                row[1] = c;
                row[2] = metadata.getTotalFilesDeleted();
                row[3] = metadata.getTimeTakenInMillis();
                row[4] = metadata.getPartitionMetadata() != null ? metadata.getPartitionMetadata().size() : 0;
                rows.add(row);
            });
        } catch (IOException e) {
            e.printStackTrace();
        }
    });
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TIME_TOKEN_MILLIS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_PARTITIONS);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)23 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)15 ArrayList (java.util.ArrayList)14 List (java.util.List)14 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)12 Map (java.util.Map)10 HoodieRestoreMetadata (org.apache.hudi.avro.model.HoodieRestoreMetadata)8 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)8 Collections (java.util.Collections)7 Collectors (java.util.stream.Collectors)7 Option (org.apache.hudi.common.util.Option)7 ValidationUtils (org.apache.hudi.common.util.ValidationUtils)7 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)7 Logger (org.apache.log4j.Logger)7 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)6 LogManager (org.apache.log4j.LogManager)6 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)5 HoodieInstantInfo (org.apache.hudi.avro.model.HoodieInstantInfo)5