use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class HoodieClientTestBase method assertPartitionMetadata.
/**
* Ensure presence of partition meta-data at known depth.
*
* @param partitionPaths Partition paths to check
* @param fs File System
* @throws IOException in case of error
*/
public void assertPartitionMetadata(String[] partitionPaths, FileSystem fs) throws IOException {
for (String partitionPath : partitionPaths) {
assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath));
pmeta.readFromFS();
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
}
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class HoodieAppendHandle method init.
private void init(HoodieRecord record) {
if (doInit) {
// extract some information from the first record
SliceView rtView = hoodieTable.getSliceView();
Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
// Set the base commit time as the current instantTime for new inserts into log files
String baseInstantTime;
String baseFile = "";
List<String> logFiles = new ArrayList<>();
if (fileSlice.isPresent()) {
baseInstantTime = fileSlice.get().getBaseInstantTime();
baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
} else {
baseInstantTime = instantTime;
// This means there is no base data file, start appending to a new log file
fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
LOG.info("New AppendHandle for partition :" + partitionPath);
}
// Prepare the first write status
writeStatus.setStat(new HoodieDeltaWriteStat());
writeStatus.setFileId(fileId);
writeStatus.setPartitionPath(partitionPath);
averageRecordSize = sizeEstimator.sizeEstimate(record);
HoodieDeltaWriteStat deltaWriteStat = (HoodieDeltaWriteStat) writeStatus.getStat();
deltaWriteStat.setPrevCommit(baseInstantTime);
deltaWriteStat.setPartitionPath(partitionPath);
deltaWriteStat.setFileId(fileId);
deltaWriteStat.setBaseFile(baseFile);
deltaWriteStat.setLogFiles(logFiles);
try {
// Save hoodie partition meta in the partition path
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
partitionMetadata.trySave(getPartitionId());
// Since the actual log file written to can be different based on when rollover happens, we use the
// base file to denote some log appends happened on a slice. writeToken will still fence concurrent
// writers.
// https://issues.apache.org/jira/browse/HUDI-1517
createMarkerFile(partitionPath, FSUtils.makeDataFileName(baseInstantTime, writeToken, fileId, hoodieTable.getBaseFileExtension()));
this.writer = createLogWriter(fileSlice, baseInstantTime);
} catch (Exception e) {
LOG.error("Error in update task at commit " + instantTime, e);
writeStatus.setGlobalError(e);
throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + "/" + partitionPath, e);
}
doInit = false;
}
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class HoodieMergeHandle method init.
/**
* Extract old file path, initialize StorageWriter and WriteStatus.
*/
private void init(String fileId, String partitionPath, HoodieBaseFile baseFileToMerge) {
LOG.info("partitionPath:" + partitionPath + ", fileId to be merged:" + fileId);
this.baseFileToMerge = baseFileToMerge;
this.writtenRecordKeys = new HashSet<>();
writeStatus.setStat(new HoodieWriteStat());
try {
String latestValidFilePath = baseFileToMerge.getFileName();
writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
partitionMetadata.trySave(getPartitionId());
String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
makeOldAndNewFilePaths(partitionPath, latestValidFilePath, newFileName);
LOG.info(String.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(), newFilePath.toString()));
// file name is same for all records, in this bunch
writeStatus.setFileId(fileId);
writeStatus.setPartitionPath(partitionPath);
writeStatus.getStat().setPartitionPath(partitionPath);
writeStatus.getStat().setFileId(fileId);
setWriteStatusPath();
// Create Marker file
createMarkerFile(partitionPath, newFileName);
// Create the writer for writing the new version file
fileWriter = createNewFileWriter(instantTime, newFilePath, hoodieTable, config, writeSchemaWithMetaFields, taskContextSupplier);
} catch (IOException io) {
LOG.error("Error in update task at commit " + instantTime, io);
writeStatus.setGlobalError(io);
throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit " + instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
}
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class TestTablePathUtils method setup.
@BeforeAll
static void setup() throws IOException {
URI tablePathURI = Paths.get(tempDir.getAbsolutePath(), "test_table").toUri();
tablePath = new Path(tablePathURI);
fs = tablePath.getFileSystem(new Configuration());
// Create bootstrap index folder
assertTrue(new File(Paths.get(tablePathURI.getPath(), HoodieTableMetaClient.BOOTSTRAP_INDEX_ROOT_FOLDER_PATH).toUri()).mkdirs());
// Create partition folders
URI partitionPathURI1 = Paths.get(tablePathURI.getPath(), "key1=abc/key2=def").toUri();
partitionPath1 = new Path(partitionPathURI1);
URI partitionPathURI2 = Paths.get(tablePathURI.getPath(), "key1=xyz/key2=def").toUri();
partitionPath2 = new Path(partitionPathURI2);
assertTrue(new File(partitionPathURI1).mkdirs());
assertTrue(new File(partitionPathURI2).mkdirs());
HoodiePartitionMetadata partitionMetadata1 = new HoodiePartitionMetadata(fs, Instant.now().toString(), tablePath, partitionPath1);
partitionMetadata1.trySave(1);
HoodiePartitionMetadata partitionMetadata2 = new HoodiePartitionMetadata(fs, Instant.now().toString(), tablePath, partitionPath2);
partitionMetadata2.trySave(2);
// Create files
URI filePathURI1 = Paths.get(partitionPathURI1.getPath(), "data1" + BASE_FILE_EXTENSION).toUri();
filePath1 = new Path(filePathURI1);
URI filePathURI2 = Paths.get(partitionPathURI2.getPath(), "data2" + BASE_FILE_EXTENSION).toUri();
filePath2 = new Path(filePathURI2);
assertTrue(new File(filePathURI1).createNewFile());
assertTrue(new File(filePathURI2).createNewFile());
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class HoodieInputFormatUtils method getTableMetaClientForBasePathUnchecked.
/**
* Extract HoodieTableMetaClient from a partition path (not base path)
*/
public static HoodieTableMetaClient getTableMetaClientForBasePathUnchecked(Configuration conf, Path partitionPath) throws IOException {
FileSystem fs = partitionPath.getFileSystem(conf);
int levels = HoodieHiveUtils.DEFAULT_LEVELS_TO_BASEPATH;
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath)) {
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
metadata.readFromFS();
levels = metadata.getPartitionDepth();
}
Path baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
LOG.info("Reading hoodie metadata from path " + baseDir.toString());
return HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).build();
}
Aggregations