use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class MarkerBasedRollbackStrategy method getRollbackRequestForAppend.
protected HoodieRollbackRequest getRollbackRequestForAppend(String markerFilePath) throws IOException {
Path baseFilePathForAppend = new Path(basePath, markerFilePath);
String fileId = FSUtils.getFileIdFromFilePath(baseFilePathForAppend);
String baseCommitTime = FSUtils.getCommitTime(baseFilePathForAppend.getName());
String relativePartitionPath = FSUtils.getRelativePartitionPath(new Path(basePath), baseFilePathForAppend.getParent());
Path partitionPath = FSUtils.getPartitionPath(config.getBasePath(), relativePartitionPath);
// NOTE: Since we're rolling back incomplete Delta Commit, it only could have appended its
// block to the latest log-file
// TODO(HUDI-1517) use provided marker-file's path instead
Option<HoodieLogFile> latestLogFileOption = FSUtils.getLatestLogFile(table.getMetaClient().getFs(), partitionPath, fileId, HoodieFileFormat.HOODIE_LOG.getFileExtension(), baseCommitTime);
Map<String, Long> logFilesWithBlocsToRollback = new HashMap<>();
if (latestLogFileOption.isPresent()) {
HoodieLogFile latestLogFile = latestLogFileOption.get();
// NOTE: Marker's don't carry information about the cumulative size of the blocks that have been appended,
// therefore we simply stub this value.
logFilesWithBlocsToRollback = Collections.singletonMap(latestLogFile.getFileStatus().getPath().toString(), -1L);
}
return new HoodieRollbackRequest(relativePartitionPath, fileId, baseCommitTime, Collections.emptyList(), logFilesWithBlocsToRollback);
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestHoodieBackedMetadata method verifyMetadataRawRecords.
/**
* Verify the metadata table on-disk raw records. When populate meta fields is enabled,
* these records should have additional meta fields in the payload. When key deduplication
* is enabled, these records on the disk should have key in the payload as empty string.
*
* @param table
* @param logFiles - Metadata table log files to be verified
* @param enableMetaFields - Enable meta fields for records
* @throws IOException
*/
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles, boolean enableMetaFields) throws IOException {
for (HoodieLogFile logFile : logFiles) {
FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
if (writerSchemaMsg == null) {
// not a data block
continue;
}
Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
recordItr.forEachRemaining(indexRecord -> {
final GenericRecord record = (GenericRecord) indexRecord;
if (enableMetaFields) {
// Metadata table records should have meta fields!
assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
} else {
// Metadata table records should not have meta fields!
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
}
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
assertFalse(key.isEmpty());
if (enableMetaFields) {
assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
}
});
}
}
}
}
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestHoodieBackedTableMetadata method verifyMetadataRawRecords.
/**
* Verify the metadata table on-disk raw records. When populate meta fields is enabled,
* these records should have additional meta fields in the payload. When key deduplication
* is enabled, these records on the disk should have key in the payload as empty string.
*
* @param table
* @param logFiles - Metadata table log files to be verified
* @throws IOException
*/
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles) throws IOException {
for (HoodieLogFile logFile : logFiles) {
FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
if (writerSchemaMsg == null) {
// not a data block
continue;
}
Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
recordItr.forEachRemaining(indexRecord -> {
final GenericRecord record = (GenericRecord) indexRecord;
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
assertFalse(key.isEmpty());
});
}
}
}
}
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestCompactionAdminClient method validateUnSchedulePlan.
/**
* Validate Unschedule operations.
*/
private List<Pair<HoodieLogFile, HoodieLogFile>> validateUnSchedulePlan(CompactionAdminClient client, String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames, boolean skipUnSchedule) throws Exception {
ensureValidCompactionPlan(compactionInstant);
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
// Log files belonging to file-slices created because of compaction request must be renamed
Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
if (skipUnSchedule) {
// Do the renaming only but do not touch the compaction plan - Needed for repair tests
renameFiles.forEach(lfPair -> {
try {
renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
} else {
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
}
Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Call the main unschedule API
client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
return renameFiles;
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestCompactionAdminClient method validateRenameFiles.
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) {
// Ensure new names of log-files are on expected lines
Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>();
Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
renameFiles.forEach(lfPair -> {
assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
uniqOldLogFiles.add(lfPair.getKey());
uniqNewLogFiles.add(lfPair.getValue());
});
renameFiles.forEach(lfPair -> {
HoodieLogFile oldLogFile = lfPair.getLeft();
HoodieLogFile newLogFile = lfPair.getValue();
assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
HoodieLogFile lastLogFileBeforeCompaction = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant).filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get();
assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion(), "Log Version expected");
assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(), "Log version does not collide");
});
}
Aggregations