use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class CompactionAdminClient method getRenamingActionsForUnschedulingCompactionOperation.
/**
* Generate renaming actions for unscheduling a compaction operation NOTE: Can only be used safely when no writer
* (ingestion/compaction) is running.
*
* @param metaClient Hoodie Table MetaClient
* @param compactionInstant Compaction Instant
* @param operation Compaction Operation
* @param fsViewOpt Cached File System View
* @param skipValidation Skip Validation
* @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule
* compaction.
*/
public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
List<Pair<HoodieLogFile, HoodieLogFile>> result = new ArrayList<>();
HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
if (!skipValidation) {
validateCompactionOperation(metaClient, compactionInstant, operation, Option.of(fileSystemView));
}
HoodieInstant lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant().get();
FileSlice merged = fileSystemView.getLatestMergedFileSlicesBeforeOrOn(operation.getPartitionPath(), lastInstant.getTimestamp()).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
List<HoodieLogFile> logFilesToRepair = merged.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(compactionInstant)).sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
FileSlice fileSliceForCompaction = fileSystemView.getLatestFileSlicesBeforeOrOn(operation.getPartitionPath(), operation.getBaseInstantTime(), true).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
int maxUsedVersion = fileSliceForCompaction.getLogFiles().findFirst().map(HoodieLogFile::getLogVersion).orElse(HoodieLogFile.LOGFILE_BASE_VERSION - 1);
String logExtn = fileSliceForCompaction.getLogFiles().findFirst().map(lf -> "." + lf.getFileExtension()).orElse(HoodieLogFile.DELTA_EXTENSION);
String parentPath = fileSliceForCompaction.getBaseFile().map(df -> new Path(df.getPath()).getParent().toString()).orElse(fileSliceForCompaction.getLogFiles().findFirst().map(lf -> lf.getPath().getParent().toString()).get());
for (HoodieLogFile toRepair : logFilesToRepair) {
int version = maxUsedVersion + 1;
HoodieLogFile newLf = new HoodieLogFile(new Path(parentPath, FSUtils.makeLogFileName(operation.getFileId(), logExtn, operation.getBaseInstantTime(), version, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
result.add(Pair.of(toRepair, newLf));
maxUsedVersion = version;
}
return result;
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class CompactionAdminClient method getRenamingActionsForUnschedulingCompactionPlan.
/**
* Generate renaming actions for unscheduling a pending compaction plan. NOTE: Can only be used safely when no writer
* (ingestion/compaction) is running.
*
* @param metaClient Hoodie Table MetaClient
* @param compactionInstant Compaction Instant to be unscheduled
* @param fsViewOpt Cached File System View
* @param skipValidation Skip Validation
* @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule
* compaction.
*/
public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism, Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
if (plan.getOperations() != null) {
LOG.info("Number of Compaction Operations :" + plan.getOperations().size() + " for instant :" + compactionInstant);
List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
context.setJobStatus(this.getClass().getSimpleName(), "Generate compaction unscheduling operations");
return context.flatMap(ops, op -> {
try {
return getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView), skipValidation).stream();
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
} catch (CompactionValidationException ve) {
throw new HoodieException(ve);
}
}, parallelism);
}
LOG.warn("No operations for compaction instant : " + compactionInstant);
return new ArrayList<>();
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class TestCompactionAdminClient method validateUnScheduleFileId.
/**
* Validate Unschedule operations.
*/
private void validateUnScheduleFileId(CompactionAdminClient client, String ingestionInstant, String compactionInstant, CompactionOperation op, int expNumRenames) throws Exception {
ensureValidCompactionPlan(compactionInstant);
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.empty(), false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
// Log files belonging to file-slices created because of compaction request must be renamed
Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Call the main unschedule API
client.unscheduleCompactionFileId(op.getFileGroupId(), false, false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
assertEquals(1, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class TestCompactionAdminClient method testUnscheduleCompactionFileId.
@Test
public void testUnscheduleCompactionFileId() throws Exception {
int numEntriesPerInstant = 10;
CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
Map<String, CompactionOperation> instantsWithOp = Stream.of("001", "003", "005", "007").map(instant -> {
try {
return Pair.of(instant, CompactionUtils.getCompactionPlan(metaClient, instant));
} catch (IOException ioe) {
throw new HoodieException(ioe);
}
}).map(instantWithPlan -> instantWithPlan.getRight().getOperations().stream().map(op -> Pair.of(instantWithPlan.getLeft(), CompactionOperation.convertFromAvroRecordInstance(op))).findFirst().get()).collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
// THere are delta-commits after compaction instant
validateUnScheduleFileId(client, "000", "001", instantsWithOp.get("001"), 2);
// THere are delta-commits after compaction instant
validateUnScheduleFileId(client, "002", "003", instantsWithOp.get("003"), 2);
// THere are no delta-commits after compaction instant
validateUnScheduleFileId(client, "004", "005", instantsWithOp.get("005"), 0);
// THere are no delta-commits after compaction instant
validateUnScheduleFileId(client, "006", "007", instantsWithOp.get("007"), 0);
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class TestHoodieTableFileSystemView method testPendingCompactionWithDuplicateFileIdsAcrossPartitions.
@Test
public void testPendingCompactionWithDuplicateFileIdsAcrossPartitions() throws Exception {
// Put some files in the partition
String partitionPath1 = "2016/05/01";
String partitionPath2 = "2016/05/02";
String partitionPath3 = "2016/05/03";
String fullPartitionPath1 = basePath + "/" + partitionPath1 + "/";
new File(fullPartitionPath1).mkdirs();
String fullPartitionPath2 = basePath + "/" + partitionPath2 + "/";
new File(fullPartitionPath2).mkdirs();
String fullPartitionPath3 = basePath + "/" + partitionPath3 + "/";
new File(fullPartitionPath3).mkdirs();
String instantTime1 = "1";
String deltaInstantTime1 = "2";
String deltaInstantTime2 = "3";
String fileId = UUID.randomUUID().toString();
String dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
new File(fullPartitionPath1 + dataFileName).createNewFile();
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
new File(fullPartitionPath1 + fileName1).createNewFile();
new File(fullPartitionPath2 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
new File(fullPartitionPath2 + fileName1).createNewFile();
new File(fullPartitionPath3 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
new File(fullPartitionPath3 + fileName1).createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
saveAsComplete(commitTimeline, instant1, Option.empty());
saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
// Now we list all partitions
FileStatus[] statuses = metaClient.getFs().listStatus(new Path[] { new Path(fullPartitionPath1), new Path(fullPartitionPath2), new Path(fullPartitionPath3) });
assertEquals(6, statuses.length);
refreshFsView();
Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(p -> fsView.getAllFileGroups(p).count());
List<HoodieFileGroup> groups = Stream.of(partitionPath1, partitionPath2, partitionPath3).flatMap(p -> fsView.getAllFileGroups(p)).collect(Collectors.toList());
assertEquals(3, groups.size(), "Expected number of file-groups");
assertEquals(3, groups.stream().map(HoodieFileGroup::getPartitionPath).collect(Collectors.toSet()).size(), "Partitions must be different for file-groups");
Set<String> fileIds = groups.stream().map(HoodieFileGroup::getFileGroupId).map(HoodieFileGroupId::getFileId).collect(Collectors.toSet());
assertEquals(1, fileIds.size(), "File Id must be same");
assertTrue(fileIds.contains(fileId), "Expected FileId");
// Setup Pending compaction for all of these fileIds.
List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
List<FileSlice> fileSlices = rtView.getLatestFileSlices(partitionPath1).collect(Collectors.toList());
partitionFileSlicesPairs.add(Pair.of(partitionPath1, fileSlices.get(0)));
fileSlices = rtView.getLatestFileSlices(partitionPath2).collect(Collectors.toList());
partitionFileSlicesPairs.add(Pair.of(partitionPath2, fileSlices.get(0)));
fileSlices = rtView.getLatestFileSlices(partitionPath3).collect(Collectors.toList());
partitionFileSlicesPairs.add(Pair.of(partitionPath3, fileSlices.get(0)));
String compactionRequestedTime = "2";
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
// Create a Data-file for some of the partitions but this should be skipped by view
new File(basePath + "/" + partitionPath1 + "/" + compactDataFileName).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + compactDataFileName).createNewFile();
HoodieInstant compactionInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionRequestedTime);
HoodieInstant requested = HoodieTimeline.getCompactionRequestedInstant(compactionInstant.getTimestamp());
metaClient.getActiveTimeline().saveToCompactionRequested(requested, TimelineMetadataUtils.serializeCompactionPlan(compactionPlan));
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(requested);
// Fake delta-ingestion after compaction-requested
String deltaInstantTime4 = "4";
String deltaInstantTime5 = "6";
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0, TEST_WRITE_TOKEN);
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1, TEST_WRITE_TOKEN);
new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
new File(basePath + "/" + partitionPath3 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath3 + "/" + fileName4).createNewFile();
HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
HoodieInstant deltaInstant5 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime5);
saveAsComplete(commitTimeline, deltaInstant4, Option.empty());
saveAsComplete(commitTimeline, deltaInstant5, Option.empty());
refreshFsView();
// Test Data Files
List<HoodieBaseFile> dataFiles = roView.getAllBaseFiles(partitionPath1).collect(Collectors.toList());
assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
assertEquals("1", dataFiles.get(0).getCommitTime(), "Expect only valid commit");
dataFiles = roView.getAllBaseFiles(partitionPath2).collect(Collectors.toList());
assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
assertEquals("1", dataFiles.get(0).getCommitTime(), "Expect only valid commit");
// Merge API Tests
Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(partitionPath -> {
List<FileSlice> fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
assertEquals(1, fileSliceList.size(), "Expect file-slice to be merged");
FileSlice fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId());
assertEquals(dataFileName, fileSlice.getBaseFile().get().getFileName(), "Data file must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant of penultimate file-slice must be base instant");
List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(3, logFiles.size(), "Log files must include those after compaction request");
assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(2).getFileName(), "Log File Order check");
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true).collect(Collectors.toList());
assertEquals(1, fileSliceList.size(), "Expect only one file-id");
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId());
assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice");
assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Compaction requested instant must be base instant");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Log files must include only those after compaction request");
assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
// Check getLatestFileSlicesBeforeOrOn excluding fileIds in pending compaction
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, false).collect(Collectors.toList());
assertEquals(0, fileSliceList.size(), "Expect empty list as file-id is in pending compaction");
});
assertEquals(3, fsView.getPendingCompactionOperations().count());
Set<String> partitionsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue).map(CompactionOperation::getPartitionPath).collect(Collectors.toSet());
assertEquals(3, partitionsInCompaction.size());
assertTrue(partitionsInCompaction.contains(partitionPath1));
assertTrue(partitionsInCompaction.contains(partitionPath2));
assertTrue(partitionsInCompaction.contains(partitionPath3));
Set<String> fileIdsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue).map(CompactionOperation::getFileId).collect(Collectors.toSet());
assertEquals(1, fileIdsInCompaction.size());
assertTrue(fileIdsInCompaction.contains(fileId));
}
Aggregations