use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method getAllArchivedCommitInstants.
private List<HoodieInstant> getAllArchivedCommitInstants(List<String> commitTimes, String action) {
List<HoodieInstant> allInstants = new ArrayList<>();
commitTimes.forEach(entry -> allInstants.addAll(getArchivedInstants(new HoodieInstant(State.COMPLETED, action, entry))));
return allInstants;
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveTableWithCleanCommits.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveTableWithCleanCommits(boolean enableMetadata) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 2);
// min archival commits is 2 and max archival commits is 4
// (either clean commits has to be > 4 or commits has to be greater than 4)
// and so, after 5th commit, 3 commits will be archived.
// 1,2,3,4,5,6 : after archival -> 1,5,6 (because, 2,3,4,5 and 6 are clean commits and are eligible for archival)
// after 7th and 8th commit no-op wrt archival.
Map<String, Integer> cleanStats = new HashMap<>();
cleanStats.put("p1", 1);
cleanStats.put("p2", 2);
for (int i = 1; i < 9; i++) {
if (i == 1) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 10);
} else if (i < 7) {
testTable.doClean("0000000" + i, cleanStats);
} else {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
}
// trigger archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
if (i < 6) {
assertEquals(originalCommits, commitsAfterArchival);
} else if (i == 6) {
if (!enableMetadata) {
// 1,2,3,4,5,6 : after archival -> 1,5,6 (bcoz, 2,3,4,5 and 6 are clean commits and are eligible for archival)
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000005", "00000006"), HoodieTimeline.CLEAN_ACTION));
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004"), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
} else {
// with metadata enabled, archival in data table is fenced based on compaction in metadata table. Clean commits in data table will not trigger compaction in
// metadata table.
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000002", "00000003", "00000004", "00000005", "00000006"), HoodieTimeline.CLEAN_ACTION));
verifyArchival(getAllArchivedCommitInstants(Collections.emptyList(), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
}
} else {
if (!enableMetadata) {
assertEquals(originalCommits, commitsAfterArchival);
} else {
if (i == 7) {
// when i == 7 compaction in metadata table will be triggered and hence archival in datatable will kick in.
// 1,2,3,4,5,6 : after archival -> 1,5,6 (bcoz, 2,3,4,5 and 6 are clean commits and are eligible for archival)
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001", "00000007")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000005", "00000006"), HoodieTimeline.CLEAN_ACTION));
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004"), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
} else {
assertEquals(originalCommits, commitsAfterArchival);
}
}
}
}
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveRollbacksAndCleanTestTable.
@Test
public void testArchiveRollbacksAndCleanTestTable() throws Exception {
int minArchiveCommits = 2;
int maxArchiveCommits = 9;
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, minArchiveCommits, maxArchiveCommits, 2);
// trigger 1 commit to add lot of files so that future cleans can clean them up
testTable.doWriteOperation("00000001", WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 20);
Map<String, Integer> partitionToFileDeleteCount = new HashMap<>();
partitionToFileDeleteCount.put("p1", 1);
partitionToFileDeleteCount.put("p2", 1);
// we are triggering 10 clean commits. (1 is commit, 2 -> 11 is clean)
for (int i = 2; i <= (maxArchiveCommits + 2); i++) {
testTable.doClean((i > 9 ? ("000000") : ("0000000")) + i, partitionToFileDeleteCount);
}
// we are triggering 7 commits and 7 rollbacks for the same
for (int i = 12; i <= (2 * maxArchiveCommits); i += 2) {
testTable.doWriteOperation("000000" + i, WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
testTable.doRollback("000000" + i, "000000" + (i + 1));
}
// trigger archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
// out of 10 clean commits, 8 will be archived. 2 to 9. 10 and 11 will be active.
// wrt regular commits, there aren't 9 commits yet and so all of them will be active.
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000010", "00000011"), HoodieTimeline.CLEAN_ACTION));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001", "00000012", "00000014", "00000016", "00000018")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000013", "00000015", "00000017", "00000019"), HoodieTimeline.ROLLBACK_ACTION));
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004", "00000005", "00000006", "00000007", "00000008", "00000009"), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method testPendingClusteringWillBlockArchival.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testPendingClusteringWillBlockArchival(boolean enableMetadata) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 5, 2);
HoodieTestDataGenerator.createPendingReplaceFile(basePath, "00000000", wrapperFs.getConf());
for (int i = 1; i < 8; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 2);
// archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits, commitsAfterArchival);
}
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(7, timeline.countInstants(), "Since we have a pending clustering instant at 00000000, we should never archive any commit after 00000000");
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveTableWithMetadataTableCompaction.
@Test
public void testArchiveTableWithMetadataTableCompaction() throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 4, 7);
// and then 2nd compaction will take place
for (int i = 1; i < 6; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
// trigger archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits, commitsAfterArchival);
}
// two more commits will trigger compaction in metadata table and will let archival move forward.
testTable.doWriteOperation("00000006", WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
testTable.doWriteOperation("00000007", WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
// trigger archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
// before archival 1,2,3,4,5,6,7
// after archival 6,7
assertEquals(originalCommits.size() - commitsAfterArchival.size(), 5);
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000002", "00000003", "00000004", "00000005")), getActiveCommitInstants(Arrays.asList("00000006", "00000007")), commitsAfterArchival);
// 3 more commits, 6 and 7 will be archived. but will not move after 6 since compaction has to kick in metadata table.
testTable.doWriteOperation("00000008", WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
testTable.doWriteOperation("00000009", WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
// trigger archival
commitsList = archiveAndGetCommitsList(writeConfig);
originalCommits = commitsList.getKey();
commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits, commitsAfterArchival);
// ideally, this will archive commits 6, 7, 8 but since compaction in metadata is until 6, only 6 will get archived,
testTable.doWriteOperation("00000010", WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
commitsList = archiveAndGetCommitsList(writeConfig);
originalCommits = commitsList.getKey();
commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits.size() - commitsAfterArchival.size(), 1);
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000002", "00000003", "00000004", "00000005", "00000006")), getActiveCommitInstants(Arrays.asList("00000007", "00000008", "00000009", "00000010")), commitsAfterArchival);
// and then 2nd compaction will take place at 12th commit
for (int i = 11; i < 14; i++) {
testTable.doWriteOperation("000000" + i, WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
// trigger archival
commitsList = archiveAndGetCommitsList(writeConfig);
originalCommits = commitsList.getKey();
commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits, commitsAfterArchival);
}
// one more commit will trigger compaction in metadata table and will let archival move forward.
testTable.doWriteOperation("00000014", WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
// trigger archival
commitsList = archiveAndGetCommitsList(writeConfig);
originalCommits = commitsList.getKey();
commitsAfterArchival = commitsList.getValue();
// before archival 7,8,9,10,11,12,13,14
// after archival 13,14
assertEquals(originalCommits.size() - commitsAfterArchival.size(), 6);
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000002", "00000003", "00000004", "00000005", "00000006", "00000007", "00000008", "00000009", "00000010", "00000011", "00000012")), getActiveCommitInstants(Arrays.asList("00000013", "00000014")), commitsAfterArchival);
}
Aggregations