use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class CompactionAdminClient method validateCompactionOperation.
/**
* Check if a compaction operation is valid.
*
* @param metaClient Hoodie Table Meta client
* @param compactionInstant Compaction Instant
* @param operation Compaction Operation
* @param fsViewOpt File System View
*/
private ValidationOpResult validateCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt) throws IOException {
HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Option<HoodieInstant> lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant();
try {
if (lastInstant.isPresent()) {
Option<FileSlice> fileSliceOptional = Option.fromJavaOptional(fileSystemView.getLatestUnCompactedFileSlices(operation.getPartitionPath()).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst());
if (fileSliceOptional.isPresent()) {
FileSlice fs = fileSliceOptional.get();
Option<HoodieBaseFile> df = fs.getBaseFile();
if (operation.getDataFileName().isPresent()) {
String expPath = metaClient.getFs().getFileStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(operation.getDataFileName().get()))).getPath().toString();
ValidationUtils.checkArgument(df.isPresent(), "Data File must be present. File Slice was : " + fs + ", operation :" + operation);
ValidationUtils.checkArgument(df.get().getPath().equals(expPath), "Base Path in operation is specified as " + expPath + " but got path " + df.get().getPath());
}
Set<HoodieLogFile> logFilesInFileSlice = fs.getLogFiles().collect(Collectors.toSet());
Set<HoodieLogFile> logFilesInCompactionOp = operation.getDeltaFileNames().stream().map(dp -> {
try {
FileStatus[] fileStatuses = metaClient.getFs().listStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(dp)));
ValidationUtils.checkArgument(fileStatuses.length == 1, "Expect only 1 file-status");
return new HoodieLogFile(fileStatuses[0]);
} catch (FileNotFoundException fe) {
throw new CompactionValidationException(fe.getMessage());
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
}).collect(Collectors.toSet());
Set<HoodieLogFile> missing = logFilesInCompactionOp.stream().filter(lf -> !logFilesInFileSlice.contains(lf)).collect(Collectors.toSet());
ValidationUtils.checkArgument(missing.isEmpty(), "All log files specified in compaction operation is not present. Missing :" + missing + ", Exp :" + logFilesInCompactionOp + ", Got :" + logFilesInFileSlice);
Set<HoodieLogFile> diff = logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf)).collect(Collectors.toSet());
ValidationUtils.checkArgument(diff.stream().allMatch(lf -> lf.getBaseCommitTime().equals(compactionInstant)), "There are some log-files which are neither specified in compaction plan " + "nor present after compaction request instant. Some of these :" + diff);
} else {
throw new CompactionValidationException("Unable to find file-slice for file-id (" + operation.getFileId() + " Compaction operation is invalid.");
}
} else {
throw new CompactionValidationException("Unable to find any committed instant. Compaction Operation may be pointing to stale file-slices");
}
} catch (CompactionValidationException | IllegalArgumentException e) {
return new ValidationOpResult(operation, false, Option.of(e));
}
return new ValidationOpResult(operation, true, Option.empty());
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class CompactionAdminClient method unscheduleCompactionFileId.
/**
* Remove a fileId from pending compaction. Removes the associated compaction operation and rename delta-files that
* were generated for that file-id after the compaction operation was scheduled.
*
* This operation MUST be executed with compactions and writer turned OFF.
*
* @param fgId FileGroupId to be unscheduled
* @param skipValidation Skip validation
* @param dryRun Dry Run Mode
*/
public List<RenameOpResult> unscheduleCompactionFileId(HoodieFileGroupId fgId, boolean skipValidation, boolean dryRun) throws Exception {
HoodieTableMetaClient metaClient = createMetaClient(false);
List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = getRenamingActionsForUnschedulingCompactionForFileId(metaClient, fgId, Option.empty(), skipValidation);
List<RenameOpResult> res = runRenamingOps(metaClient, renameActions, 1, dryRun);
if (!dryRun && !res.isEmpty() && res.get(0).isExecuted() && res.get(0).isSuccess()) {
// Ready to remove this file-Id from compaction request
Pair<String, HoodieCompactionOperation> compactionOperationWithInstant = CompactionUtils.getAllPendingCompactionOperations(metaClient).get(fgId);
HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, compactionOperationWithInstant.getKey());
List<HoodieCompactionOperation> newOps = plan.getOperations().stream().filter(op -> (!op.getFileId().equals(fgId.getFileId())) && (!op.getPartitionPath().equals(fgId.getPartitionPath()))).collect(Collectors.toList());
HoodieCompactionPlan newPlan = HoodieCompactionPlan.newBuilder().setOperations(newOps).setExtraMetadata(plan.getExtraMetadata()).build();
HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, compactionOperationWithInstant.getLeft());
Path inflightPath = new Path(metaClient.getMetaPath(), inflight.getFileName());
if (metaClient.getFs().exists(inflightPath)) {
// revert if in inflight state
metaClient.getActiveTimeline().revertCompactionInflightToRequested(inflight);
}
// Overwrite compaction plan with updated info
metaClient.getActiveTimeline().saveToCompactionRequested(new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionOperationWithInstant.getLeft()), TimelineMetadataUtils.serializeCompactionPlan(newPlan), true);
}
return res;
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class CompactionAdminClient method getRenamingActionsToAlignWithCompactionOperation.
/**
* Get Renaming actions to ensure the log-files of merged file-slices is aligned with compaction operation. This
* method is used to recover from failures during unschedule compaction operations.
*
* @param metaClient Hoodie Table Meta Client
* @param compactionInstant Compaction Instant
* @param op Compaction Operation
* @param fsViewOpt File System View
*/
protected static List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsToAlignWithCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation op, Option<HoodieTableFileSystemView> fsViewOpt) {
HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
HoodieInstant lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant().get();
FileSlice merged = fileSystemView.getLatestMergedFileSlicesBeforeOrOn(op.getPartitionPath(), lastInstant.getTimestamp()).filter(fs -> fs.getFileId().equals(op.getFileId())).findFirst().get();
final int maxVersion = op.getDeltaFileNames().stream().map(lf -> FSUtils.getFileVersionFromLog(new Path(lf))).reduce((x, y) -> x > y ? x : y).orElse(0);
List<HoodieLogFile> logFilesToBeMoved = merged.getLogFiles().filter(lf -> lf.getLogVersion() > maxVersion).collect(Collectors.toList());
return logFilesToBeMoved.stream().map(lf -> {
ValidationUtils.checkArgument(lf.getLogVersion() - maxVersion > 0, "Expect new log version to be sane");
HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(), FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()), compactionInstant, lf.getLogVersion() - maxVersion, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
return Pair.of(lf, newLogFile);
}).collect(Collectors.toList());
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class ITTestSavepointsCommand method testRollbackToSavepoint.
/**
* Test case of command 'savepoint rollback'.
*/
@Test
public void testRollbackToSavepoint() throws IOException {
// generate four savepoints
for (int i = 100; i < 104; i++) {
String instantTime = String.valueOf(i);
HoodieTestDataGenerator.createCommitFile(tablePath, instantTime, jsc.hadoopConfiguration());
}
// generate one savepoint
String savepoint = "102";
HoodieTestDataGenerator.createSavepointFile(tablePath, savepoint, jsc.hadoopConfiguration());
CommandResult cr = getShell().executeCommand(String.format("savepoint rollback --savepoint %s --sparkMaster %s", savepoint, "local"));
assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertEquals(String.format("Savepoint \"%s\" rolled back", savepoint), cr.getResult().toString()));
// there is 1 restore instant
HoodieActiveTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
assertEquals(1, timeline.getRestoreTimeline().countInstants());
// 103 instant had rollback
assertFalse(timeline.getCommitTimeline().containsInstant(new HoodieInstant(HoodieInstant.State.COMPLETED, "commit", "103")));
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class ITTestSavepointsCommand method testDeleteSavepoint.
/**
* Test case of command 'savepoint delete'.
*/
@Test
public void testDeleteSavepoint() throws IOException {
// generate four savepoints
for (int i = 100; i < 104; i++) {
String instantTime = String.valueOf(i);
HoodieTestDataGenerator.createCommitFile(tablePath, instantTime, jsc.hadoopConfiguration());
}
// generate two savepoint
String savepoint1 = "100";
String savepoint2 = "102";
HoodieTestDataGenerator.createSavepointFile(tablePath, savepoint1, jsc.hadoopConfiguration());
HoodieTestDataGenerator.createSavepointFile(tablePath, savepoint2, jsc.hadoopConfiguration());
HoodieActiveTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
assertEquals(2, timeline.getSavePointTimeline().countInstants(), "There should 2 instants.");
CommandResult cr = getShell().executeCommand(String.format("savepoint delete --commit %s --sparkMaster %s", savepoint1, "local"));
assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertEquals(String.format("Savepoint \"%s\" deleted.", savepoint1), cr.getResult().toString()));
// reload timeline
timeline = timeline.reload();
assertEquals(1, timeline.getSavePointTimeline().countInstants(), "There should 1 instants.");
// after delete, 100 instant should not exist.
assertFalse(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepoint1)));
}
Aggregations