use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class LogFileDTO method toHoodieLogFile.
public static HoodieLogFile toHoodieLogFile(LogFileDTO dto) {
FileStatus status = FileStatusDTO.toFileStatus(dto.fileStatus);
HoodieLogFile logFile = (status == null) ? new HoodieLogFile(dto.pathStr) : new HoodieLogFile(status);
logFile.setFileLen(dto.fileLen);
return logFile;
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class HoodieAppendHandle method init.
private void init(HoodieRecord record) {
if (doInit) {
// extract some information from the first record
SliceView rtView = hoodieTable.getSliceView();
Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
// Set the base commit time as the current instantTime for new inserts into log files
String baseInstantTime;
String baseFile = "";
List<String> logFiles = new ArrayList<>();
if (fileSlice.isPresent()) {
baseInstantTime = fileSlice.get().getBaseInstantTime();
baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
} else {
baseInstantTime = instantTime;
// This means there is no base data file, start appending to a new log file
fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
LOG.info("New AppendHandle for partition :" + partitionPath);
}
// Prepare the first write status
writeStatus.setStat(new HoodieDeltaWriteStat());
writeStatus.setFileId(fileId);
writeStatus.setPartitionPath(partitionPath);
averageRecordSize = sizeEstimator.sizeEstimate(record);
HoodieDeltaWriteStat deltaWriteStat = (HoodieDeltaWriteStat) writeStatus.getStat();
deltaWriteStat.setPrevCommit(baseInstantTime);
deltaWriteStat.setPartitionPath(partitionPath);
deltaWriteStat.setFileId(fileId);
deltaWriteStat.setBaseFile(baseFile);
deltaWriteStat.setLogFiles(logFiles);
try {
// Save hoodie partition meta in the partition path
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
partitionMetadata.trySave(getPartitionId());
// Since the actual log file written to can be different based on when rollover happens, we use the
// base file to denote some log appends happened on a slice. writeToken will still fence concurrent
// writers.
// https://issues.apache.org/jira/browse/HUDI-1517
createMarkerFile(partitionPath, FSUtils.makeDataFileName(baseInstantTime, writeToken, fileId, hoodieTable.getBaseFileExtension()));
this.writer = createLogWriter(fileSlice, baseInstantTime);
} catch (Exception e) {
LOG.error("Error in update task at commit " + instantTime, e);
writeStatus.setGlobalError(e);
throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + "/" + partitionPath, e);
}
doInit = false;
}
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class HoodieFlinkWriteableTestTable method appendRecordsToLogFile.
private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> groupedRecords) throws Exception {
String partitionPath = groupedRecords.get(0).getPartitionPath();
HoodieRecordLocation location = groupedRecords.get(0).getCurrentLocation();
try (HoodieLogFormat.Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()).overBaseCommit(location.getInstantTime()).withFs(fs).build()) {
Map<HeaderMetadataType, String> header = new java.util.HashMap<>();
header.put(HeaderMetadataType.INSTANT_TIME, location.getInstantTime());
header.put(HeaderMetadataType.SCHEMA, schema.toString());
logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
try {
GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
return (IndexedRecord) val;
} catch (IOException e) {
LOG.warn("Failed to convert record " + r.toString(), e);
return null;
}
}).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
return Pair.of(partitionPath, logWriter.getLogFile());
}
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestHoodieTableFileSystemView method testViewForFileSlicesWithNoBaseFile.
protected void testViewForFileSlicesWithNoBaseFile(int expNumTotalFileSlices, int expNumTotalDataFiles, String partitionPath) throws Exception {
Paths.get(basePath, partitionPath).toFile().mkdirs();
String fileId = UUID.randomUUID().toString();
String instantTime1 = "1";
String deltaInstantTime1 = "2";
String deltaInstantTime2 = "3";
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName2).toFile().createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
saveAsComplete(commitTimeline, instant1, Option.empty());
saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
refreshFsView();
List<HoodieBaseFile> dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
assertTrue(dataFiles.isEmpty(), "No data file expected");
List<FileSlice> fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
FileSlice fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
// Check Merged File Slices API
fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime2).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
// Check UnCompacted File Slices API
fileSliceList = rtView.getLatestUnCompactedFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
assertEquals(expNumTotalFileSlices, rtView.getAllFileSlices(partitionPath).count(), "Total number of file-slices in view matches expected");
assertEquals(expNumTotalDataFiles, roView.getAllBaseFiles(partitionPath).count(), "Total number of data-files in view matches expected");
assertEquals(1, fsView.getAllFileGroups(partitionPath).count(), "Total number of file-groups in view matches expected");
}
use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.
the class TestHoodieRealtimeFileSplit method setUp.
@BeforeEach
public void setUp(@TempDir java.nio.file.Path tempDir) throws Exception {
basePath = tempDir.toAbsolutePath().toString();
deltaLogFiles = Collections.singletonList(new HoodieLogFile(new Path(basePath + "/1.log"), 0L));
deltaLogPaths = Collections.singletonList(basePath + "/1.log");
fileSplitName = basePath + "/test.file";
baseFileSplit = new FileSplit(new Path(fileSplitName), 0, 100, new String[] {});
maxCommitTime = "10001";
split = new HoodieRealtimeFileSplit(baseFileSplit, basePath, deltaLogFiles, maxCommitTime, false, Option.empty());
}
Aggregations