use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class AbstractTableFileSystemView method resetFileGroupsReplaced.
/**
* Get replaced instant for each file group by looking at all commit instants.
*/
private void resetFileGroupsReplaced(HoodieTimeline timeline) {
HoodieTimer hoodieTimer = new HoodieTimer();
hoodieTimer.startTimer();
// for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
HoodieTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = replacedTimeline.getInstants().flatMap(instant -> {
try {
HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
// get replace instant mapping for each partition, fileId
return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().flatMap(entry -> entry.getValue().stream().map(e -> new AbstractMap.SimpleEntry<>(new HoodieFileGroupId(entry.getKey(), e), instant)));
} catch (HoodieIOException ex) {
if (ex.getIOException() instanceof FileNotFoundException) {
// Replace instant could be deleted by archive and FileNotFoundException could be threw during getInstantDetails function
// So that we need to catch the FileNotFoundException here and continue
LOG.warn(ex.getMessage());
return Stream.empty();
} else {
throw ex;
}
} catch (IOException e) {
throw new HoodieIOException("error reading commit metadata for " + instant);
}
});
Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
resetReplacedFileGroups(replacedFileGroups);
LOG.info("Took " + hoodieTimer.endTimer() + " ms to read " + replacedTimeline.countInstants() + " instants, " + replacedFileGroups.size() + " replaced file groups");
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class TestCleaner method testCleanWithReplaceCommits.
@Test
public void testCleanWithReplaceCommits() throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
String p0 = "2020/01/01";
String p1 = "2020/01/02";
// make 1 commit, with 1 file per partition
String file1P0C0 = UUID.randomUUID().toString();
String file1P1C0 = UUID.randomUUID().toString();
testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001", Collections.unmodifiableMap(new HashMap<String, List<String>>() {
{
put(p0, CollectionUtils.createImmutableList(file1P0C0));
put(p1, CollectionUtils.createImmutableList(file1P1C0));
}
}));
metadataWriter.update(commitMetadata, "00000000000001", false);
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. logically delete p0. No change to p1
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId002 = testTable.forReplaceCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0);
String file2P0C1 = partitionAndFileId002.get(p0);
Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> replaceMetadata = generateReplaceCommitMetadata("00000000000002", p0, file1P0C0, file2P0C1);
testTable.addReplaceCommit("00000000000002", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
// run cleaner
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. Replace data in p1. No change to p0
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId003 = testTable.forReplaceCommit("00000000000003").getFileIdsWithBaseFilesInPartitions(p1);
String file3P1C2 = partitionAndFileId003.get(p1);
replaceMetadata = generateReplaceCommitMetadata("00000000000003", p1, file1P1C0, file3P1C2);
testTable.addReplaceCommit("00000000000003", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
// run cleaner
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
assertEquals(0, hoodieCleanStatsThree.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. Replace data in p0 again
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId004 = testTable.forReplaceCommit("00000000000004").getFileIdsWithBaseFilesInPartitions(p0);
String file4P0C3 = partitionAndFileId004.get(p0);
replaceMetadata = generateReplaceCommitMetadata("00000000000004", p0, file2P0C1, file4P0C3);
testTable.addReplaceCommit("00000000000004", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
// run cleaner
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, 5);
assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
// file1P1C0 still stays because its not replaced until 3 and its the only version available
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. Replace all data in p1. no new files created
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId005 = testTable.forReplaceCommit("00000000000006").getFileIdsWithBaseFilesInPartitions(p1);
String file4P1C4 = partitionAndFileId005.get(p1);
replaceMetadata = generateReplaceCommitMetadata("00000000000006", p0, file3P1C2, file4P1C4);
testTable.addReplaceCommit("00000000000006", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, 7);
assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertFalse(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class TestTimelineUtils method getReplaceCommitMetadata.
private byte[] getReplaceCommitMetadata(String basePath, String commitTs, String replacePartition, int replaceCount, String newFilePartition, int newFileCount, Map<String, String> extraMetadata, WriteOperationType operationType) throws IOException {
HoodieReplaceCommitMetadata commit = new HoodieReplaceCommitMetadata();
commit.setOperationType(operationType);
for (int i = 1; i <= newFileCount; i++) {
HoodieWriteStat stat = new HoodieWriteStat();
stat.setFileId(i + "");
stat.setPartitionPath(Paths.get(basePath, newFilePartition).toString());
stat.setPath(commitTs + "." + i + metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
commit.addWriteStat(newFilePartition, stat);
}
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
if (replaceCount > 0) {
partitionToReplaceFileIds.put(replacePartition, new ArrayList<>());
}
for (int i = 1; i <= replaceCount; i++) {
partitionToReplaceFileIds.get(replacePartition).add(FSUtils.createNewFileIdPfx());
}
commit.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
for (Map.Entry<String, String> extraEntries : extraMetadata.entrySet()) {
commit.addMetadata(extraEntries.getKey(), extraEntries.getValue());
}
return commit.toJsonString().getBytes(StandardCharsets.UTF_8);
}
use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.
the class HoodieTestReplaceCommitMetadataGenerator method generateReplaceCommitMetadata.
private static HoodieReplaceCommitMetadata generateReplaceCommitMetadata(HashMap<String, List<String>> partitionToFilePaths, Option<Integer> writes, Option<Integer> updates) {
HoodieReplaceCommitMetadata metadata = new HoodieReplaceCommitMetadata();
partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setPartitionPath(key);
writeStat.setPath(DEFAULT_PATH);
writeStat.setFileId(DEFAULT_FILEID);
writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES);
writeStat.setPrevCommit(DEFAULT_PRE_COMMIT);
writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES));
writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES));
writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS);
writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS);
metadata.addWriteStat(key, writeStat);
}));
metadata.setPartitionToReplaceFileIds(new HashMap<String, List<String>>() {
{
// TODO fix
put(DEFAULT_FIRST_PARTITION_PATH, createImmutableList(baseFileName(DEFAULT_FIRST_PARTITION_PATH, "1")));
}
});
return metadata;
}
Aggregations