Search in sources :

Example 71 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class KafkaConnectUtils method getCommitMetadataForLatestInstant.

/**
 * Get the Metadata from the latest commit file.
 *
 * @param metaClient The {@link HoodieTableMetaClient} to get access to the meta data.
 * @return An Optional {@link HoodieCommitMetadata} containing the meta data from the latest commit file.
 */
public static Option<HoodieCommitMetadata> getCommitMetadataForLatestInstant(HoodieTableMetaClient metaClient) {
    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().filter(instant -> (metaClient.getTableType() == HoodieTableType.COPY_ON_WRITE && instant.getAction().equals(HoodieActiveTimeline.COMMIT_ACTION)) || (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && instant.getAction().equals(HoodieActiveTimeline.DELTA_COMMIT_ACTION)));
    Option<HoodieInstant> latestInstant = timeline.lastInstant();
    if (latestInstant.isPresent()) {
        try {
            byte[] data = timeline.getInstantDetails(latestInstant.get()).get();
            return Option.of(HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class));
        } catch (Exception e) {
            throw new HoodieException("Failed to read schema from commit metadata", e);
        }
    } else {
        return Option.empty();
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException)

Example 72 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class RollbackUtils method generateRollbackRequestsUsingFileListingMOR.

/**
 * Generate all rollback requests that we need to perform for rolling back this action without actually performing rolling back for MOR table type.
 *
 * @param instantToRollback Instant to Rollback
 * @param table instance of {@link HoodieTable} to use.
 * @param context instance of {@link HoodieEngineContext} to use.
 * @return list of rollback requests
 */
public static List<ListingBasedRollbackRequest> generateRollbackRequestsUsingFileListingMOR(HoodieInstant instantToRollback, HoodieTable table, HoodieEngineContext context) throws IOException {
    String commit = instantToRollback.getTimestamp();
    HoodieWriteConfig config = table.getConfig();
    List<String> partitions = FSUtils.getAllPartitionPaths(context, table.getMetaClient().getBasePath(), false, false);
    if (partitions.isEmpty()) {
        return new ArrayList<>();
    }
    int sparkPartitions = Math.max(Math.min(partitions.size(), config.getRollbackParallelism()), 1);
    context.setJobStatus(RollbackUtils.class.getSimpleName(), "Generate all rollback requests");
    return context.flatMap(partitions, partitionPath -> {
        HoodieActiveTimeline activeTimeline = table.getMetaClient().reloadActiveTimeline();
        List<ListingBasedRollbackRequest> partitionRollbackRequests = new ArrayList<>();
        switch(instantToRollback.getAction()) {
            case HoodieTimeline.COMMIT_ACTION:
            case HoodieTimeline.REPLACE_COMMIT_ACTION:
                LOG.info("Rolling back commit action.");
                partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
                break;
            case HoodieTimeline.COMPACTION_ACTION:
                // If there is no delta commit present after the current commit (if compaction), no action, else we
                // need to make sure that a compaction commit rollback also deletes any log files written as part of the
                // succeeding deltacommit.
                boolean higherDeltaCommits = !activeTimeline.getDeltaCommitTimeline().filterCompletedInstants().findInstantsAfter(commit, 1).empty();
                if (higherDeltaCommits) {
                    // Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
                    // and has not yet finished. In this scenario we should delete only the newly created base files
                    // and not corresponding base commit log files created with this as baseCommit since updates would
                    // have been written to the log files.
                    LOG.info("Rolling back compaction. There are higher delta commits. So only deleting data files");
                    partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataFilesOnlyAction(partitionPath));
                } else {
                    // No deltacommits present after this compaction commit (inflight or requested). In this case, we
                    // can also delete any log files that were created with this compaction commit as base
                    // commit.
                    LOG.info("Rolling back compaction plan. There are NO higher delta commits. So deleting both data and" + " log files");
                    partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
                }
                break;
            case HoodieTimeline.DELTA_COMMIT_ACTION:
                // --------------------------------------------------------------------------------------------------
                // (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
                // --------------------------------------------------------------------------------------------------
                // (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
                // this scenario we would want to delete these log files.
                // (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
                // HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
                // (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
                // being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
                // and hence will end up deleting these log files. This is done so there are no orphan log files
                // lying around.
                // (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
                // taken in this scenario is a combination of (A.2) and (A.3)
                // ---------------------------------------------------------------------------------------------------
                // (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
                // ---------------------------------------------------------------------------------------------------
                // (B.1) Failed first commit - Inserts were written to base files and HoodieWriteStat has no entries.
                // In this scenario, we delete all the base files written for the failed commit.
                // (B.2) Failed recurring commits - Inserts were written to base files and updates to log files. In
                // this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
                // (B.3) Rollback triggered for first commit - Same as (B.1)
                // (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
                // as well if the base base file gets deleted.
                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(table.getMetaClient().getCommitTimeline().getInstantDetails(instantToRollback).get(), HoodieCommitMetadata.class);
                // In case all data was inserts and the commit failed, delete the file belonging to that commit
                // We do not know fileIds for inserts (first inserts are either log files or base files),
                // delete all files for the corresponding failed commit, if present (same as COW)
                partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
                // append rollback blocks for updates and inserts as A.2 and B.2
                if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
                    partitionRollbackRequests.addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata, table));
                }
                break;
            default:
                break;
        }
        return partitionRollbackRequests.stream();
    }, Math.min(partitions.size(), sparkPartitions)).stream().filter(Objects::nonNull).collect(Collectors.toList());
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) ValidationUtils.checkArgument(org.apache.hudi.common.util.ValidationUtils.checkArgument) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ArrayList(java.util.ArrayList) List(java.util.List)

Example 73 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class RollbackUtils method generateAppendRollbackBlocksAction.

private static List<ListingBasedRollbackRequest> generateAppendRollbackBlocksAction(String partitionPath, HoodieInstant rollbackInstant, HoodieCommitMetadata commitMetadata, HoodieTable table) {
    checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
    // wStat.getPrevCommit() might not give the right commit time in the following
    // scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
    // used to write the new log files. In this case, the commit time for the log file is the compaction requested time.
    // But the index (global) might store the baseCommit of the base and not the requested, hence get the
    // baseCommit always by listing the file slice
    // With multi writers, rollbacks could be lazy. and so we need to use getLatestFileSlicesBeforeOrOn() instead of getLatestFileSlices()
    Map<String, FileSlice> latestFileSlices = table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, rollbackInstant.getTimestamp(), true).collect(Collectors.toMap(FileSlice::getFileId, Function.identity()));
    return commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(writeStat -> {
        // Filter out stats without prevCommit since they are all inserts
        boolean validForRollback = (writeStat != null) && (!writeStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) && (writeStat.getPrevCommit() != null) && latestFileSlices.containsKey(writeStat.getFileId());
        if (!validForRollback) {
            return false;
        }
        FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
        // For sanity, log-file base-instant time can never be less than base-commit on which we are rolling back
        checkArgument(HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN_OR_EQUALS, rollbackInstant.getTimestamp()), "Log-file base-instant could not be less than the instant being rolled back");
        // in a different branch of the flow.
        return HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN, rollbackInstant.getTimestamp());
    }).map(writeStat -> {
        FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
        return ListingBasedRollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath, writeStat.getFileId(), latestFileSlice.getBaseInstantTime(), writeStat);
    }).collect(Collectors.toList());
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) ValidationUtils.checkArgument(org.apache.hudi.common.util.ValidationUtils.checkArgument) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FSUtils(org.apache.hudi.common.fs.FSUtils) FileSlice(org.apache.hudi.common.model.FileSlice)

Example 74 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class HoodieTestCommitGenerator method generateCommitMetadata.

public static HoodieCommitMetadata generateCommitMetadata(Map<String, List<Pair<String, String>>> partitionPathToFileIdAndNameMap, Map<String, String> extraMetadata) {
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    for (Map.Entry<String, String> entry : extraMetadata.entrySet()) {
        metadata.addMetadata(entry.getKey(), entry.getValue());
    }
    partitionPathToFileIdAndNameMap.forEach((partitionPath, fileInfoList) -> fileInfoList.forEach(fileInfo -> {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partitionPath);
        writeStat.setPath(new Path(partitionPath, fileInfo.getValue()).toString());
        writeStat.setFileId(fileInfo.getKey());
        // Below are dummy values
        writeStat.setTotalWriteBytes(10000);
        writeStat.setPrevCommit("000");
        writeStat.setNumWrites(10);
        writeStat.setNumUpdateWrites(15);
        writeStat.setTotalLogBlocks(2);
        writeStat.setTotalLogRecords(100);
        metadata.addWriteStat(partitionPath, writeStat);
    }));
    return metadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSystem(org.apache.hadoop.fs.FileSystem) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HashMap(java.util.HashMap) UUID(java.util.UUID) StandardCharsets(java.nio.charset.StandardCharsets) ArrayList(java.util.ArrayList) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Logger(org.apache.log4j.Logger) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) DEFAULT_WRITE_TOKEN(org.apache.hudi.common.table.log.HoodieLogFormat.DEFAULT_WRITE_TOKEN) Path(org.apache.hadoop.fs.Path) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) Map(java.util.Map)

Example 75 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class HoodieTestCommitGenerator method createCommitAndDataFiles.

public static void createCommitAndDataFiles(String basePath, String instantTime, Map<String, List<Pair<String, String>>> partitionPathToFileIdAndNameMap) throws IOException {
    String commitFilename = HoodieTimeline.makeCommitFileName(instantTime);
    HoodieCommitMetadata commitMetadata = generateCommitMetadata(partitionPathToFileIdAndNameMap, Collections.emptyMap());
    String content = commitMetadata.toJsonString();
    createCommitFileWithMetadata(basePath, new Configuration(), commitFilename, content);
    for (String partitionPath : partitionPathToFileIdAndNameMap.keySet()) {
        partitionPathToFileIdAndNameMap.get(partitionPath).forEach(fileInfo -> {
            String filename = fileInfo.getValue();
            try {
                createDataFile(basePath, new Configuration(), partitionPath, filename);
            } catch (IOException e) {
                LOG.error(String.format("Failed to create data file: %s/%s/%s", basePath, partitionPath, filename));
            }
        });
    }
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException)

Aggregations

HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)139 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)64 ArrayList (java.util.ArrayList)54 HashMap (java.util.HashMap)49 List (java.util.List)48 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)44 IOException (java.io.IOException)42 Test (org.junit.jupiter.api.Test)41 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)40 Map (java.util.Map)38 Path (org.apache.hadoop.fs.Path)36 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)34 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)34 File (java.io.File)26 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)26 Option (org.apache.hudi.common.util.Option)25 Schema (org.apache.avro.Schema)22 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)21 Collectors (java.util.stream.Collectors)20 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)20