use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestMergeOnReadRollbackActionExecutor method testMergeOnReadRollbackActionExecutor.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testMergeOnReadRollbackActionExecutor(boolean isUsingMarkers) throws IOException {
// 1. prepare data and assert data result
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
HoodieWriteConfig cfg = getConfigBuilder().withRollbackUsingMarkers(isUsingMarkers).withAutoCommit(false).build();
twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, !isUsingMarkers);
List<HoodieLogFile> firstPartitionCommit2LogFiles = new ArrayList<>();
List<HoodieLogFile> secondPartitionCommit2LogFiles = new ArrayList<>();
firstPartitionCommit2FileSlices.get(0).getLogFiles().collect(Collectors.toList()).forEach(logFile -> firstPartitionCommit2LogFiles.add(logFile));
assertEquals(1, firstPartitionCommit2LogFiles.size());
secondPartitionCommit2FileSlices.get(0).getLogFiles().collect(Collectors.toList()).forEach(logFile -> secondPartitionCommit2LogFiles.add(logFile));
assertEquals(1, secondPartitionCommit2LogFiles.size());
HoodieTable table = this.getHoodieTable(metaClient, cfg);
// 2. rollback
HoodieInstant rollBackInstant = new HoodieInstant(isUsingMarkers, HoodieTimeline.DELTA_COMMIT_ACTION, "002");
BaseRollbackPlanActionExecutor mergeOnReadRollbackPlanActionExecutor = new BaseRollbackPlanActionExecutor(context, cfg, table, "003", rollBackInstant, false, cfg.shouldRollbackUsingMarkers());
mergeOnReadRollbackPlanActionExecutor.execute().get();
MergeOnReadRollbackActionExecutor mergeOnReadRollbackActionExecutor = new MergeOnReadRollbackActionExecutor(context, cfg, table, "003", rollBackInstant, true, false);
// 3. assert the rollback stat
Map<String, HoodieRollbackPartitionMetadata> rollbackMetadata = mergeOnReadRollbackActionExecutor.execute().getPartitionMetadata();
assertEquals(2, rollbackMetadata.size());
for (Map.Entry<String, HoodieRollbackPartitionMetadata> entry : rollbackMetadata.entrySet()) {
HoodieRollbackPartitionMetadata meta = entry.getValue();
assertTrue(meta.getFailedDeleteFiles() == null || meta.getFailedDeleteFiles().size() == 0);
assertTrue(meta.getSuccessDeleteFiles() == null || meta.getSuccessDeleteFiles().size() == 0);
}
// 4. assert file group after rollback, and compare to the rollbackstat
// assert the first partition data and log file size
List<HoodieFileGroup> firstPartitionRollBack1FileGroups = table.getFileSystemView().getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
assertEquals(1, firstPartitionRollBack1FileGroups.size());
List<FileSlice> firstPartitionRollBack1FileSlices = firstPartitionRollBack1FileGroups.get(0).getAllFileSlices().collect(Collectors.toList());
assertEquals(1, firstPartitionRollBack1FileSlices.size());
FileSlice firstPartitionRollBack1FileSlice = firstPartitionRollBack1FileSlices.get(0);
List<HoodieLogFile> firstPartitionRollBackLogFiles = firstPartitionRollBack1FileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, firstPartitionRollBackLogFiles.size());
firstPartitionRollBackLogFiles.removeAll(firstPartitionCommit2LogFiles);
assertEquals(1, firstPartitionRollBackLogFiles.size());
// assert the second partition data and log file size
List<HoodieFileGroup> secondPartitionRollBack1FileGroups = table.getFileSystemView().getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList());
assertEquals(1, secondPartitionRollBack1FileGroups.size());
List<FileSlice> secondPartitionRollBack1FileSlices = secondPartitionRollBack1FileGroups.get(0).getAllFileSlices().collect(Collectors.toList());
assertEquals(1, secondPartitionRollBack1FileSlices.size());
FileSlice secondPartitionRollBack1FileSlice = secondPartitionRollBack1FileSlices.get(0);
List<HoodieLogFile> secondPartitionRollBackLogFiles = secondPartitionRollBack1FileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, secondPartitionRollBackLogFiles.size());
secondPartitionRollBackLogFiles.removeAll(secondPartitionCommit2LogFiles);
assertEquals(1, secondPartitionRollBackLogFiles.size());
assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, "002").doesMarkerDirExist());
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestCompactionAdminClient method validateUnScheduleFileId.
/**
* Validate Unschedule operations.
*/
private void validateUnScheduleFileId(CompactionAdminClient client, String ingestionInstant, String compactionInstant, CompactionOperation op, int expNumRenames) throws Exception {
ensureValidCompactionPlan(compactionInstant);
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.empty(), false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
// Log files belonging to file-slices created because of compaction request must be renamed
Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Call the main unschedule API
client.unscheduleCompactionFileId(op.getFileGroupId(), false, false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
assertEquals(1, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class SparkUpsertDeltaCommitPartitioner method getSmallFiles.
@Override
protected List<SmallFile> getSmallFiles(String partitionPath) {
// Init here since this class (and member variables) might not have been initialized
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
if (commitTimeline.empty()) {
return Collections.emptyList();
}
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
// Find out all eligible small file slices, looking for
// smallest file in the partition to append to
List<FileSlice> smallFileSlicesCandidates = getSmallFileCandidates(partitionPath, latestCommitTime);
List<SmallFile> smallFileLocations = new ArrayList<>();
// Create SmallFiles from the eligible file slices
for (FileSlice smallFileSlice : smallFileSlicesCandidates) {
SmallFile sf = new SmallFile();
if (smallFileSlice.getBaseFile().isPresent()) {
// TODO : Move logic of file name, file id, base commit time handling inside file slice
String filename = smallFileSlice.getBaseFile().get().getFileName();
sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
sf.sizeBytes = getTotalFileSize(smallFileSlice);
smallFileLocations.add(sf);
} else {
HoodieLogFile logFile = smallFileSlice.getLogFiles().findFirst().get();
sf.location = new HoodieRecordLocation(FSUtils.getBaseCommitTimeFromLogPath(logFile.getPath()), FSUtils.getFileIdFromLogPath(logFile.getPath()));
sf.sizeBytes = getTotalFileSize(smallFileSlice);
smallFileLocations.add(sf);
}
}
return smallFileLocations;
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestUpgradeDowngrade method assertMarkerFilesForUpgrade.
private void assertMarkerFilesForUpgrade(HoodieTable table, HoodieInstant commitInstant, List<FileSlice> firstPartitionCommit2FileSlices, List<FileSlice> secondPartitionCommit2FileSlices) throws IOException {
// Verify recreated marker files are as expected
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
assertTrue(writeMarkers.doesMarkerDirExist());
Set<String> files = writeMarkers.allMarkerFilePaths();
assertEquals(2, files.size());
List<String> actualFiles = new ArrayList<>();
for (String file : files) {
String fileName = WriteMarkers.stripMarkerSuffix(file);
actualFiles.add(fileName);
}
List<FileSlice> expectedFileSlices = new ArrayList<>();
expectedFileSlices.addAll(firstPartitionCommit2FileSlices);
expectedFileSlices.addAll(secondPartitionCommit2FileSlices);
List<String> expectedPaths = new ArrayList<>();
List<Pair<String, String>> expectedLogFilePaths = new ArrayList<>();
for (FileSlice fileSlice : expectedFileSlices) {
String partitionPath = fileSlice.getPartitionPath();
if (table.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
for (HoodieLogFile logFile : fileSlice.getLogFiles().collect(Collectors.toList())) {
// log file format can't be matched as is, since the write token can't be asserted. Hence asserting for partitionpath, fileId and baseCommit time.
String logBaseCommitTime = logFile.getBaseCommitTime();
expectedLogFilePaths.add(Pair.of(partitionPath + "/" + logFile.getFileId(), logBaseCommitTime));
}
}
if (fileSlice.getBaseInstantTime().equals(commitInstant.getTimestamp())) {
String path = fileSlice.getBaseFile().get().getPath();
// for base files, path can be asserted as is.
expectedPaths.add(path.substring(path.indexOf(partitionPath)));
}
}
// Trim log file paths only
List<String> trimmedActualFiles = new ArrayList<>();
for (String actualFile : actualFiles) {
if (table.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
trimmedActualFiles.add(actualFile.substring(0, actualFile.lastIndexOf('.')));
} else {
trimmedActualFiles.add(actualFile);
}
}
// assert for base files.
for (String expected : expectedPaths) {
if (trimmedActualFiles.contains(expected)) {
trimmedActualFiles.remove(expected);
}
}
if (expectedLogFilePaths.size() > 0) {
// assert for log files
List<Pair<String, String>> actualLogFiles = new ArrayList<>();
for (String actual : trimmedActualFiles) {
actualLogFiles.add(Pair.of(actual.substring(0, actual.indexOf('_')), actual.substring(actual.lastIndexOf('_') + 1)));
}
assertEquals(expectedLogFilePaths.size(), actualLogFiles.size());
for (Pair<String, String> entry : expectedLogFilePaths) {
assertTrue(actualLogFiles.contains(entry));
}
} else {
assertTrue(trimmedActualFiles.size() == 0);
}
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestHoodieLogFormat method testConcurrentAppend.
private void testConcurrentAppend(boolean logFileExists, boolean newLogFileFormat) throws Exception {
HoodieLogFormat.WriterBuilder builder1 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs);
HoodieLogFormat.WriterBuilder builder2 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs);
if (newLogFileFormat && logFileExists) {
// Assume there is an existing log-file with write token
builder1 = builder1.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
builder2 = builder2.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
} else if (newLogFileFormat) {
// First log file of the file-slice
builder1 = builder1.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
builder2 = builder2.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
} else {
builder1 = builder1.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
}
Writer writer = builder1.build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
writer.appendBlock(dataBlock);
Writer writer2 = builder2.build();
writer2.appendBlock(dataBlock);
HoodieLogFile logFile1 = writer.getLogFile();
HoodieLogFile logFile2 = writer2.getLogFile();
writer.close();
writer2.close();
assertNotNull(logFile1.getLogWriteToken());
assertEquals(logFile1.getLogVersion(), logFile2.getLogVersion() - 1, "Log Files must have different versions");
}
Aggregations