use of org.apache.hudi.common.model.HoodieBaseFile in project hudi by apache.
the class BaseFileDTO method toHoodieBaseFile.
public static HoodieBaseFile toHoodieBaseFile(BaseFileDTO dto) {
if (null == dto) {
return null;
}
HoodieBaseFile baseFile;
if (null != dto.fileStatus) {
baseFile = new HoodieBaseFile(FileStatusDTO.toFileStatus(dto.fileStatus));
} else {
baseFile = new HoodieBaseFile(dto.fullPath);
baseFile.setFileLen(dto.fileLen);
}
baseFile.setBootstrapBaseFile(toBaseFile(dto.bootstrapBaseFile));
return baseFile;
}
use of org.apache.hudi.common.model.HoodieBaseFile in project hudi by apache.
the class WriteProfile method smallFilesProfile.
/**
* Returns a list of small files in the given partition path from the latest filesystem view.
*/
protected List<SmallFile> smallFilesProfile(String partitionPath) {
// smallFiles only for partitionPath
List<SmallFile> smallFileLocations = new ArrayList<>();
HoodieTimeline commitTimeline = metaClient.getCommitsTimeline().filterCompletedInstants();
if (!commitTimeline.empty()) {
// if we have some commits
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
List<HoodieBaseFile> allFiles = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
for (HoodieBaseFile file : allFiles) {
// filter out the corrupted files.
if (file.getFileSize() < config.getParquetSmallFileLimit() && file.getFileSize() > 0) {
String filename = file.getFileName();
SmallFile sf = new SmallFile();
sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
sf.sizeBytes = file.getFileSize();
smallFileLocations.add(sf);
}
}
}
return smallFileLocations;
}
use of org.apache.hudi.common.model.HoodieBaseFile in project hudi by apache.
the class HiveTestUtil method generateLogData.
private static HoodieLogFile generateLogData(Path parquetFilePath, boolean isLogSchemaSimple) throws IOException, InterruptedException, URISyntaxException {
Schema schema = getTestDataSchema(isLogSchemaSimple);
HoodieBaseFile dataFile = new HoodieBaseFile(fileSystem.getFileStatus(parquetFilePath));
// Write a log file for this parquet file
Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(parquetFilePath.getParent()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(dataFile.getFileId()).overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build();
List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100) : SchemaTestUtil.generateEvolvedTestRecords(100, 100));
Map<HeaderMetadataType, String> header = new HashMap<>(2);
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
logWriter.appendBlock(dataBlock);
logWriter.close();
return logWriter.getLogFile();
}
use of org.apache.hudi.common.model.HoodieBaseFile in project hudi by apache.
the class HoodieKeyLocationFetchHandle method locations.
public Stream<Pair<HoodieKey, HoodieRecordLocation>> locations() {
HoodieBaseFile baseFile = partitionPathBaseFilePair.getRight();
BaseFileUtils baseFileUtils = BaseFileUtils.getInstance(baseFile.getPath());
List<HoodieKey> hoodieKeyList = new ArrayList<>();
if (keyGeneratorOpt.isPresent()) {
hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()), keyGeneratorOpt);
} else {
hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()));
}
return hoodieKeyList.stream().map(entry -> Pair.of(entry, new HoodieRecordLocation(baseFile.getCommitTime(), baseFile.getFileId())));
}
use of org.apache.hudi.common.model.HoodieBaseFile in project hudi by apache.
the class TestHoodieTableFileSystemView method testViewForFileSlicesWithNoBaseFile.
protected void testViewForFileSlicesWithNoBaseFile(int expNumTotalFileSlices, int expNumTotalDataFiles, String partitionPath) throws Exception {
Paths.get(basePath, partitionPath).toFile().mkdirs();
String fileId = UUID.randomUUID().toString();
String instantTime1 = "1";
String deltaInstantTime1 = "2";
String deltaInstantTime2 = "3";
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName2).toFile().createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
saveAsComplete(commitTimeline, instant1, Option.empty());
saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
refreshFsView();
List<HoodieBaseFile> dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
assertTrue(dataFiles.isEmpty(), "No data file expected");
List<FileSlice> fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
FileSlice fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
// Check Merged File Slices API
fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime2).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
// Check UnCompacted File Slices API
fileSliceList = rtView.getLatestUnCompactedFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
assertEquals(expNumTotalFileSlices, rtView.getAllFileSlices(partitionPath).count(), "Total number of file-slices in view matches expected");
assertEquals(expNumTotalDataFiles, roView.getAllBaseFiles(partitionPath).count(), "Total number of data-files in view matches expected");
assertEquals(1, fsView.getAllFileGroups(partitionPath).count(), "Total number of file-groups in view matches expected");
}
Aggregations