use of org.apache.hudi.table.action.commit.BucketInfo in project hudi by apache.
the class BucketAssigner method addInsert.
public BucketInfo addInsert(String partitionPath) {
// for new inserts, compute buckets depending on how many records we have for each partition
SmallFileAssign smallFileAssign = getSmallFileAssign(partitionPath);
// first try packing this into one of the smallFiles
if (smallFileAssign != null && smallFileAssign.assign()) {
return new BucketInfo(BucketType.UPDATE, smallFileAssign.getFileId(), partitionPath);
}
// if we have anything more, create new insert buckets, like normal
if (newFileAssignStates.containsKey(partitionPath)) {
NewFileAssignState newFileAssignState = newFileAssignStates.get(partitionPath);
if (newFileAssignState.canAssign()) {
newFileAssignState.assign();
final String key = StreamerUtil.generateBucketKey(partitionPath, newFileAssignState.fileId);
if (bucketInfoMap.containsKey(key)) {
// UPDATE bucket through calling #addUpdate.
return bucketInfoMap.get(key);
}
return new BucketInfo(BucketType.UPDATE, newFileAssignState.fileId, partitionPath);
}
}
BucketInfo bucketInfo = new BucketInfo(BucketType.INSERT, createFileIdOfThisTask(), partitionPath);
final String key = StreamerUtil.generateBucketKey(partitionPath, bucketInfo.getFileIdPrefix());
bucketInfoMap.put(key, bucketInfo);
NewFileAssignState newFileAssignState = new NewFileAssignState(bucketInfo.getFileIdPrefix(), writeProfile.getRecordsPerBucket());
newFileAssignState.assign();
newFileAssignStates.put(partitionPath, newFileAssignState);
return bucketInfo;
}
Aggregations