use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class KafkaConnectUtils method getCommitMetadataForLatestInstant.
/**
* Get the Metadata from the latest commit file.
*
* @param metaClient The {@link HoodieTableMetaClient} to get access to the meta data.
* @return An Optional {@link HoodieCommitMetadata} containing the meta data from the latest commit file.
*/
public static Option<HoodieCommitMetadata> getCommitMetadataForLatestInstant(HoodieTableMetaClient metaClient) {
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().filter(instant -> (metaClient.getTableType() == HoodieTableType.COPY_ON_WRITE && instant.getAction().equals(HoodieActiveTimeline.COMMIT_ACTION)) || (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && instant.getAction().equals(HoodieActiveTimeline.DELTA_COMMIT_ACTION)));
Option<HoodieInstant> latestInstant = timeline.lastInstant();
if (latestInstant.isPresent()) {
try {
byte[] data = timeline.getInstantDetails(latestInstant.get()).get();
return Option.of(HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class));
} catch (Exception e) {
throw new HoodieException("Failed to read schema from commit metadata", e);
}
} else {
return Option.empty();
}
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class RollbackUtils method generateRollbackRequestsUsingFileListingMOR.
/**
* Generate all rollback requests that we need to perform for rolling back this action without actually performing rolling back for MOR table type.
*
* @param instantToRollback Instant to Rollback
* @param table instance of {@link HoodieTable} to use.
* @param context instance of {@link HoodieEngineContext} to use.
* @return list of rollback requests
*/
public static List<ListingBasedRollbackRequest> generateRollbackRequestsUsingFileListingMOR(HoodieInstant instantToRollback, HoodieTable table, HoodieEngineContext context) throws IOException {
String commit = instantToRollback.getTimestamp();
HoodieWriteConfig config = table.getConfig();
List<String> partitions = FSUtils.getAllPartitionPaths(context, table.getMetaClient().getBasePath(), false, false);
if (partitions.isEmpty()) {
return new ArrayList<>();
}
int sparkPartitions = Math.max(Math.min(partitions.size(), config.getRollbackParallelism()), 1);
context.setJobStatus(RollbackUtils.class.getSimpleName(), "Generate all rollback requests");
return context.flatMap(partitions, partitionPath -> {
HoodieActiveTimeline activeTimeline = table.getMetaClient().reloadActiveTimeline();
List<ListingBasedRollbackRequest> partitionRollbackRequests = new ArrayList<>();
switch(instantToRollback.getAction()) {
case HoodieTimeline.COMMIT_ACTION:
case HoodieTimeline.REPLACE_COMMIT_ACTION:
LOG.info("Rolling back commit action.");
partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
break;
case HoodieTimeline.COMPACTION_ACTION:
// If there is no delta commit present after the current commit (if compaction), no action, else we
// need to make sure that a compaction commit rollback also deletes any log files written as part of the
// succeeding deltacommit.
boolean higherDeltaCommits = !activeTimeline.getDeltaCommitTimeline().filterCompletedInstants().findInstantsAfter(commit, 1).empty();
if (higherDeltaCommits) {
// Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
// and has not yet finished. In this scenario we should delete only the newly created base files
// and not corresponding base commit log files created with this as baseCommit since updates would
// have been written to the log files.
LOG.info("Rolling back compaction. There are higher delta commits. So only deleting data files");
partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataFilesOnlyAction(partitionPath));
} else {
// No deltacommits present after this compaction commit (inflight or requested). In this case, we
// can also delete any log files that were created with this compaction commit as base
// commit.
LOG.info("Rolling back compaction plan. There are NO higher delta commits. So deleting both data and" + " log files");
partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
}
break;
case HoodieTimeline.DELTA_COMMIT_ACTION:
// --------------------------------------------------------------------------------------------------
// (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
// --------------------------------------------------------------------------------------------------
// (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
// this scenario we would want to delete these log files.
// (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
// HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
// (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
// being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
// and hence will end up deleting these log files. This is done so there are no orphan log files
// lying around.
// (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
// taken in this scenario is a combination of (A.2) and (A.3)
// ---------------------------------------------------------------------------------------------------
// (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
// ---------------------------------------------------------------------------------------------------
// (B.1) Failed first commit - Inserts were written to base files and HoodieWriteStat has no entries.
// In this scenario, we delete all the base files written for the failed commit.
// (B.2) Failed recurring commits - Inserts were written to base files and updates to log files. In
// this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
// (B.3) Rollback triggered for first commit - Same as (B.1)
// (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
// as well if the base base file gets deleted.
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(table.getMetaClient().getCommitTimeline().getInstantDetails(instantToRollback).get(), HoodieCommitMetadata.class);
// In case all data was inserts and the commit failed, delete the file belonging to that commit
// We do not know fileIds for inserts (first inserts are either log files or base files),
// delete all files for the corresponding failed commit, if present (same as COW)
partitionRollbackRequests.add(ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
// append rollback blocks for updates and inserts as A.2 and B.2
if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
partitionRollbackRequests.addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata, table));
}
break;
default:
break;
}
return partitionRollbackRequests.stream();
}, Math.min(partitions.size(), sparkPartitions)).stream().filter(Objects::nonNull).collect(Collectors.toList());
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class RollbackUtils method generateAppendRollbackBlocksAction.
private static List<ListingBasedRollbackRequest> generateAppendRollbackBlocksAction(String partitionPath, HoodieInstant rollbackInstant, HoodieCommitMetadata commitMetadata, HoodieTable table) {
checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
// wStat.getPrevCommit() might not give the right commit time in the following
// scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
// used to write the new log files. In this case, the commit time for the log file is the compaction requested time.
// But the index (global) might store the baseCommit of the base and not the requested, hence get the
// baseCommit always by listing the file slice
// With multi writers, rollbacks could be lazy. and so we need to use getLatestFileSlicesBeforeOrOn() instead of getLatestFileSlices()
Map<String, FileSlice> latestFileSlices = table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, rollbackInstant.getTimestamp(), true).collect(Collectors.toMap(FileSlice::getFileId, Function.identity()));
return commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(writeStat -> {
// Filter out stats without prevCommit since they are all inserts
boolean validForRollback = (writeStat != null) && (!writeStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) && (writeStat.getPrevCommit() != null) && latestFileSlices.containsKey(writeStat.getFileId());
if (!validForRollback) {
return false;
}
FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
// For sanity, log-file base-instant time can never be less than base-commit on which we are rolling back
checkArgument(HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN_OR_EQUALS, rollbackInstant.getTimestamp()), "Log-file base-instant could not be less than the instant being rolled back");
// in a different branch of the flow.
return HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN, rollbackInstant.getTimestamp());
}).map(writeStat -> {
FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
return ListingBasedRollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath, writeStat.getFileId(), latestFileSlice.getBaseInstantTime(), writeStat);
}).collect(Collectors.toList());
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class HoodieTestCommitGenerator method generateCommitMetadata.
public static HoodieCommitMetadata generateCommitMetadata(Map<String, List<Pair<String, String>>> partitionPathToFileIdAndNameMap, Map<String, String> extraMetadata) {
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
for (Map.Entry<String, String> entry : extraMetadata.entrySet()) {
metadata.addMetadata(entry.getKey(), entry.getValue());
}
partitionPathToFileIdAndNameMap.forEach((partitionPath, fileInfoList) -> fileInfoList.forEach(fileInfo -> {
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setPartitionPath(partitionPath);
writeStat.setPath(new Path(partitionPath, fileInfo.getValue()).toString());
writeStat.setFileId(fileInfo.getKey());
// Below are dummy values
writeStat.setTotalWriteBytes(10000);
writeStat.setPrevCommit("000");
writeStat.setNumWrites(10);
writeStat.setNumUpdateWrites(15);
writeStat.setTotalLogBlocks(2);
writeStat.setTotalLogRecords(100);
metadata.addWriteStat(partitionPath, writeStat);
}));
return metadata;
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class HoodieTestCommitGenerator method createCommitAndDataFiles.
public static void createCommitAndDataFiles(String basePath, String instantTime, Map<String, List<Pair<String, String>>> partitionPathToFileIdAndNameMap) throws IOException {
String commitFilename = HoodieTimeline.makeCommitFileName(instantTime);
HoodieCommitMetadata commitMetadata = generateCommitMetadata(partitionPathToFileIdAndNameMap, Collections.emptyMap());
String content = commitMetadata.toJsonString();
createCommitFileWithMetadata(basePath, new Configuration(), commitFilename, content);
for (String partitionPath : partitionPathToFileIdAndNameMap.keySet()) {
partitionPathToFileIdAndNameMap.get(partitionPath).forEach(fileInfo -> {
String filename = fileInfo.getValue();
try {
createDataFile(basePath, new Configuration(), partitionPath, filename);
} catch (IOException e) {
LOG.error(String.format("Failed to create data file: %s/%s/%s", basePath, partitionPath, filename));
}
});
}
}
Aggregations