use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class BaseJavaCommitActionExecutor method execute.
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute(List<HoodieRecord<T>> inputRecords) {
HoodieWriteMetadata<List<WriteStatus>> result = new HoodieWriteMetadata<>();
WorkloadProfile workloadProfile = null;
if (isWorkloadProfileNeeded()) {
workloadProfile = new WorkloadProfile(buildProfile(inputRecords), table.getIndex().canIndexLogFiles());
LOG.info("Input workload profile :" + workloadProfile);
}
final Partitioner partitioner = getPartitioner(workloadProfile);
try {
saveWorkloadProfileMetadataToInflight(workloadProfile, instantTime);
} catch (Exception e) {
HoodieTableMetaClient metaClient = table.getMetaClient();
HoodieInstant inflightInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, metaClient.getCommitActionType(), instantTime);
try {
if (!metaClient.getFs().exists(new Path(metaClient.getMetaPath(), inflightInstant.getFileName()))) {
throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", e);
}
} catch (IOException ex) {
LOG.error("Check file exists failed");
throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", ex);
}
}
Map<Integer, List<HoodieRecord<T>>> partitionedRecords = partition(inputRecords, partitioner);
List<WriteStatus> writeStatuses = new LinkedList<>();
partitionedRecords.forEach((partition, records) -> {
if (WriteOperationType.isChangingRecords(operationType)) {
handleUpsertPartition(instantTime, partition, records.iterator(), partitioner).forEachRemaining(writeStatuses::addAll);
} else {
handleInsertPartition(instantTime, partition, records.iterator(), partitioner).forEachRemaining(writeStatuses::addAll);
}
});
updateIndex(writeStatuses, result);
updateIndexAndCommitIfNeeded(writeStatuses, result);
return result;
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class JavaUpsertPartitioner method averageBytesPerRecord.
/**
* Obtains the average record size based on records written during previous commits. Used for estimating how many
* records pack into one file.
*/
protected static long averageBytesPerRecord(HoodieTimeline commitTimeline, HoodieWriteConfig hoodieWriteConfig) {
long avgSize = hoodieWriteConfig.getCopyOnWriteRecordSizeEstimate();
long fileSizeThreshold = (long) (hoodieWriteConfig.getRecordSizeEstimationThreshold() * hoodieWriteConfig.getParquetSmallFileLimit());
try {
if (!commitTimeline.empty()) {
// Go over the reverse ordered commits to get a more recent estimate of average record size.
Iterator<HoodieInstant> instants = commitTimeline.getReverseOrderedInstants().iterator();
while (instants.hasNext()) {
HoodieInstant instant = instants.next();
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(instant).get(), HoodieCommitMetadata.class);
long totalBytesWritten = commitMetadata.fetchTotalBytesWritten();
long totalRecordsWritten = commitMetadata.fetchTotalRecordsWritten();
if (totalBytesWritten > fileSizeThreshold && totalRecordsWritten > 0) {
avgSize = (long) Math.ceil((1.0 * totalBytesWritten) / totalRecordsWritten);
break;
}
}
}
} catch (Throwable t) {
// make this fail safe.
LOG.error("Error trying to compute average bytes/record ", t);
}
return avgSize;
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTableFileSystemView method testReplaceFileIdIsExcludedInView.
@Test
public void testReplaceFileIdIsExcludedInView() throws IOException {
String partitionPath1 = "2020/06/27";
String partitionPath2 = "2020/07/14";
new File(basePath + "/" + partitionPath1).mkdirs();
new File(basePath + "/" + partitionPath2).mkdirs();
// create 2 fileId in partition1 - fileId1 is replaced later on.
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
// create 2 fileId in partition2 - fileId3, fileId4 is replaced later on.
String fileId3 = UUID.randomUUID().toString();
String fileId4 = UUID.randomUUID().toString();
assertFalse(roView.getLatestBaseFiles(partitionPath1).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2)), "No commit, should not find any data file");
assertFalse(roView.getLatestBaseFiles(partitionPath2).anyMatch(dfile -> dfile.getFileId().equals(fileId3) || dfile.getFileId().equals(fileId4)), "No commit, should not find any data file");
// Only one commit
String commitTime1 = "1";
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
String fileName3 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
String fileName4 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
List<String> replacedFileIdsP1 = new ArrayList<>();
replacedFileIdsP1.add(fileId1);
partitionToReplaceFileIds.put(partitionPath1, replacedFileIdsP1);
List<String> replacedFileIdsP2 = new ArrayList<>();
replacedFileIdsP2.add(fileId3);
replacedFileIdsP2.add(fileId4);
partitionToReplaceFileIds.put(partitionPath2, replacedFileIdsP2);
HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime1);
saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
refreshFsView();
assertEquals(0, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
assertEquals(fileName2, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).findFirst().get().getFileName());
assertEquals(0, roView.getLatestBaseFiles(partitionPath2).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
assertEquals(0, roView.getLatestBaseFiles(partitionPath2).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
// ensure replacedFileGroupsBefore works with all instants
List<HoodieFileGroup> replacedOnInstant1 = fsView.getReplacedFileGroupsBeforeOrOn("0", partitionPath1).collect(Collectors.toList());
assertEquals(0, replacedOnInstant1.size());
List<HoodieFileGroup> allReplaced = fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath1).collect(Collectors.toList());
allReplaced.addAll(fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath2).collect(Collectors.toList()));
assertEquals(3, allReplaced.size());
Set<String> allReplacedFileIds = allReplaced.stream().map(fg -> fg.getFileGroupId().getFileId()).collect(Collectors.toSet());
Set<String> actualReplacedFileIds = Stream.of(fileId1, fileId3, fileId4).collect(Collectors.toSet());
assertEquals(actualReplacedFileIds, allReplacedFileIds);
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTableFileSystemView method saveAsComplete.
private static void saveAsComplete(HoodieActiveTimeline timeline, HoodieInstant inflight, Option<byte[]> data) {
if (inflight.getAction().equals(HoodieTimeline.COMPACTION_ACTION)) {
timeline.transitionCompactionInflightToComplete(inflight, data);
} else {
HoodieInstant requested = new HoodieInstant(State.REQUESTED, inflight.getAction(), inflight.getTimestamp());
timeline.createNewInstant(requested);
timeline.transitionRequestedToInflight(requested, Option.empty());
timeline.saveAsComplete(inflight, data);
}
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTableFileSystemView method testGetLatestDataFilesForFileId.
@Test
public void testGetLatestDataFilesForFileId() throws IOException {
String partitionPath = "2016/05/01";
new File(basePath + "/" + partitionPath).mkdirs();
String fileId = UUID.randomUUID().toString();
assertFalse(roView.getLatestBaseFiles(partitionPath).anyMatch(dfile -> dfile.getFileId().equals(fileId)), "No commit, should not find any data file");
// Only one commit, but is not safe
String commitTime1 = "1";
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId);
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
refreshFsView();
assertFalse(roView.getLatestBaseFiles(partitionPath).anyMatch(dfile -> dfile.getFileId().equals(fileId)), "No commit, should not find any data file");
// Make this commit safe
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
saveAsComplete(commitTimeline, instant1, Option.empty());
refreshFsView();
assertEquals(fileName1, roView.getLatestBaseFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
// Do another commit, but not safe
String commitTime2 = "2";
String fileName2 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId);
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
refreshFsView();
assertEquals(fileName1, roView.getLatestBaseFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
// Make it safe
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime2);
saveAsComplete(commitTimeline, instant2, Option.empty());
refreshFsView();
assertEquals(fileName2, roView.getLatestBaseFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
}
Aggregations