use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class TestHoodieRealtimeRecordReader method createReplaceCommitFile.
private void createReplaceCommitFile(java.nio.file.Path basePath, String commitNumber, String partitionPath, String filePath, String fileId, Map<String, List<String>> partitionToReplaceFileIds) throws IOException {
List<HoodieWriteStat> writeStats = new ArrayList<>();
HoodieWriteStat writeStat = createHoodieWriteStat(basePath, commitNumber, partitionPath, filePath, fileId);
writeStats.add(writeStat);
HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
replaceMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
writeStats.forEach(stat -> replaceMetadata.addWriteStat(partitionPath, stat));
File file = basePath.resolve(".hoodie").resolve(commitNumber + ".replacecommit").toFile();
file.createNewFile();
FileOutputStream fileOutputStream = new FileOutputStream(file);
fileOutputStream.write(replaceMetadata.toJsonString().getBytes(StandardCharsets.UTF_8));
fileOutputStream.flush();
fileOutputStream.close();
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class HoodieTestTable method doCluster.
public HoodieReplaceCommitMetadata doCluster(String commitTime, Map<String, List<String>> partitionToReplaceFileIds, List<String> partitions, int filesPerPartition) throws Exception {
HoodieTestTableState testTableState = getTestTableStateWithPartitionFileInfo(CLUSTER, metaClient.getTableType(), commitTime, partitions, filesPerPartition);
this.currentInstantTime = commitTime;
Map<String, List<Pair<String, Integer>>> partitionToReplaceFileIdsWithLength = new HashMap<>();
for (Map.Entry<String, List<String>> entry : partitionToReplaceFileIds.entrySet()) {
String partition = entry.getKey();
partitionToReplaceFileIdsWithLength.put(entry.getKey(), new ArrayList<>());
for (String fileId : entry.getValue()) {
int length = 100 + RANDOM.nextInt(500);
partitionToReplaceFileIdsWithLength.get(partition).add(Pair.of(fileId, length));
}
}
List<HoodieWriteStat> writeStats = generateHoodieWriteStatForPartition(testTableState.getPartitionToBaseFileInfoMap(commitTime), commitTime, false);
for (String partition : testTableState.getPartitionToBaseFileInfoMap(commitTime).keySet()) {
this.withBaseFilesInPartition(partition, testTableState.getPartitionToBaseFileInfoMap(commitTime).get(partition));
}
HoodieReplaceCommitMetadata replaceMetadata = (HoodieReplaceCommitMetadata) buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), CLUSTER, EMPTY_STRING, REPLACE_COMMIT_ACTION);
addReplaceCommit(commitTime, Option.empty(), Option.empty(), replaceMetadata);
return replaceMetadata;
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class TestCommitUtils method testReplaceMetadataCreation.
@Test
public void testReplaceMetadataCreation() {
List<HoodieWriteStat> writeStats = new ArrayList<>();
writeStats.add(createWriteStat("p1", "f1"));
writeStats.add(createWriteStat("p2", "f2"));
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
List<String> replacedFileIds = new ArrayList<>();
replacedFileIds.add("f0");
partitionToReplaceFileIds.put("p1", replacedFileIds);
HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT, TRIP_SCHEMA, HoodieTimeline.REPLACE_COMMIT_ACTION);
assertTrue(commitMetadata instanceof HoodieReplaceCommitMetadata);
HoodieReplaceCommitMetadata replaceCommitMetadata = (HoodieReplaceCommitMetadata) commitMetadata;
assertEquals(1, replaceCommitMetadata.getPartitionToReplaceFileIds().size());
assertEquals("f0", replaceCommitMetadata.getPartitionToReplaceFileIds().get("p1").get(0));
assertEquals(2, commitMetadata.getPartitionToWriteStats().size());
assertEquals("f1", commitMetadata.getPartitionToWriteStats().get("p1").get(0).getFileId());
assertEquals("f2", commitMetadata.getPartitionToWriteStats().get("p2").get(0).getFileId());
assertEquals(WriteOperationType.INSERT, commitMetadata.getOperationType());
assertEquals(TRIP_SCHEMA, commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY));
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class TestHoodieDeltaStreamer method testCleanerDeleteReplacedDataWithArchive.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean) throws Exception {
String tableBasePath = dfsBasePath + "/cleanerDeleteReplacedDataWithArchive" + asyncClean;
int totalRecords = 3000;
// Step 1 : Prepare and insert data without archival and cleaner.
// Make sure that there are 6 commits including 2 replacecommits completed.
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
cfg.continuousMode = true;
cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "true", "2", "", ""));
cfg.configs.add(String.format("%s=%s", HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT.key(), "0"));
cfg.configs.add(String.format("%s=%s", HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key(), "1"));
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
deltaStreamerTestRunner(ds, cfg, (r) -> {
TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
return true;
});
TestHelpers.assertAtLeastNCommits(6, tableBasePath, dfs);
TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
// Step 2 : Get the first replacecommit and extract the corresponding replaced file IDs.
HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(dfs.getConf()).setBasePath(tableBasePath).build();
HoodieTimeline replacedTimeline = meta.reloadActiveTimeline().getCompletedReplaceTimeline();
Option<HoodieInstant> firstReplaceHoodieInstant = replacedTimeline.nthFromLastInstant(1);
assertTrue(firstReplaceHoodieInstant.isPresent());
Option<byte[]> firstReplaceHoodieInstantDetails = replacedTimeline.getInstantDetails(firstReplaceHoodieInstant.get());
HoodieReplaceCommitMetadata firstReplaceMetadata = HoodieReplaceCommitMetadata.fromBytes(firstReplaceHoodieInstantDetails.get(), HoodieReplaceCommitMetadata.class);
Map<String, List<String>> partitionToReplaceFileIds = firstReplaceMetadata.getPartitionToReplaceFileIds();
String partitionName = null;
List replacedFileIDs = null;
for (Map.Entry entry : partitionToReplaceFileIds.entrySet()) {
partitionName = String.valueOf(entry.getKey());
replacedFileIDs = (List) entry.getValue();
}
assertNotNull(partitionName);
assertNotNull(replacedFileIDs);
// Step 3 : Based to replacedFileIDs , get the corresponding complete path.
ArrayList<String> replacedFilePaths = new ArrayList<>();
Path partitionPath = new Path(meta.getBasePath(), partitionName);
RemoteIterator<LocatedFileStatus> hoodieFiles = meta.getFs().listFiles(partitionPath, true);
while (hoodieFiles.hasNext()) {
LocatedFileStatus f = hoodieFiles.next();
String file = f.getPath().toUri().toString();
for (Object replacedFileID : replacedFileIDs) {
if (file.contains(String.valueOf(replacedFileID))) {
replacedFilePaths.add(file);
}
}
}
assertFalse(replacedFilePaths.isEmpty());
// Step 4 : Insert 1 record and trigger sync/async cleaner and archive.
List<String> configs = getAsyncServicesConfigs(1, "true", "true", "2", "", "");
configs.add(String.format("%s=%s", HoodieCompactionConfig.CLEANER_POLICY.key(), "KEEP_LATEST_COMMITS"));
configs.add(String.format("%s=%s", HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1"));
configs.add(String.format("%s=%s", HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2"));
configs.add(String.format("%s=%s", HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "3"));
configs.add(String.format("%s=%s", HoodieCompactionConfig.ASYNC_CLEAN.key(), asyncClean));
configs.add(String.format("%s=%s", HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key(), "1"));
if (asyncClean) {
configs.add(String.format("%s=%s", HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name()));
configs.add(String.format("%s=%s", HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(), HoodieFailedWritesCleaningPolicy.LAZY.name()));
configs.add(String.format("%s=%s", HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), InProcessLockProvider.class.getName()));
}
cfg.configs = configs;
cfg.continuousMode = false;
ds = new HoodieDeltaStreamer(cfg, jsc);
ds.sync();
// Step 5 : Make sure that firstReplaceHoodieInstant is archived.
long count = meta.reloadActiveTimeline().getCompletedReplaceTimeline().getInstants().filter(instant -> firstReplaceHoodieInstant.get().equals(instant)).count();
assertEquals(0, count);
// Step 6 : All the replaced files in firstReplaceHoodieInstant should be deleted through sync/async cleaner.
for (String replacedFilePath : replacedFilePaths) {
assertFalse(meta.getFs().exists(new Path(replacedFilePath)));
}
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class MetadataConversionUtils method createMetaWrapper.
public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInstant, HoodieTableMetaClient metaClient) throws IOException {
HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
switch(hoodieInstant.getAction()) {
case HoodieTimeline.CLEAN_ACTION:
{
if (hoodieInstant.isCompleted()) {
archivedMetaWrapper.setHoodieCleanMetadata(CleanerUtils.getCleanerMetadata(metaClient, hoodieInstant));
} else {
archivedMetaWrapper.setHoodieCleanerPlan(CleanerUtils.getCleanerPlan(metaClient, hoodieInstant));
}
archivedMetaWrapper.setActionType(ActionType.clean.name());
break;
}
case HoodieTimeline.COMMIT_ACTION:
{
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class);
archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(commitMetadata));
archivedMetaWrapper.setActionType(ActionType.commit.name());
break;
}
case HoodieTimeline.DELTA_COMMIT_ACTION:
{
HoodieCommitMetadata deltaCommitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class);
archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(deltaCommitMetadata));
archivedMetaWrapper.setActionType(ActionType.deltacommit.name());
break;
}
case HoodieTimeline.REPLACE_COMMIT_ACTION:
{
if (hoodieInstant.isCompleted()) {
HoodieReplaceCommitMetadata replaceCommitMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieReplaceCommitMetadata.class);
archivedMetaWrapper.setHoodieReplaceCommitMetadata(ReplaceArchivalHelper.convertReplaceCommitMetadata(replaceCommitMetadata));
} else if (hoodieInstant.isInflight()) {
// inflight replacecommit files have the same meta data body as HoodieCommitMetadata
// so we could re-use it without further creating an inflight extension.
// Or inflight replacecommit files are empty under clustering circumstance
Option<HoodieCommitMetadata> inflightCommitMetadata = getInflightReplaceMetadata(metaClient, hoodieInstant);
if (inflightCommitMetadata.isPresent()) {
archivedMetaWrapper.setHoodieInflightReplaceMetadata(convertCommitMetadata(inflightCommitMetadata.get()));
}
} else {
// we may have cases with empty HoodieRequestedReplaceMetadata e.g. insert_overwrite_table or insert_overwrite
// without clustering. However, we should revisit the requested commit file standardization
Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata = getRequestedReplaceMetadata(metaClient, hoodieInstant);
if (requestedReplaceMetadata.isPresent()) {
archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata.get());
}
}
archivedMetaWrapper.setActionType(ActionType.replacecommit.name());
break;
}
case HoodieTimeline.ROLLBACK_ACTION:
{
if (hoodieInstant.isCompleted()) {
archivedMetaWrapper.setHoodieRollbackMetadata(TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class));
}
archivedMetaWrapper.setActionType(ActionType.rollback.name());
break;
}
case HoodieTimeline.SAVEPOINT_ACTION:
{
archivedMetaWrapper.setHoodieSavePointMetadata(TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class));
archivedMetaWrapper.setActionType(ActionType.savepoint.name());
break;
}
case HoodieTimeline.COMPACTION_ACTION:
{
HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, hoodieInstant.getTimestamp());
archivedMetaWrapper.setHoodieCompactionPlan(plan);
archivedMetaWrapper.setActionType(ActionType.compaction.name());
break;
}
default:
{
throw new UnsupportedOperationException("Action not fully supported yet");
}
}
return archivedMetaWrapper;
}
Aggregations