use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHoodieTimelineArchiver method testLoadArchiveTimelineWithDamagedPlanFile.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testLoadArchiveTimelineWithDamagedPlanFile(boolean enableArchiveMerge) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
// do ingestion and trigger archive actions here.
for (int i = 1; i < 8; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
archiveAndGetCommitsList(writeConfig);
}
Path plan = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
String s = "Dummy Content";
// stain the current merge plan file.
FileIOUtils.createFileInPath(metaClient.getFs(), plan, Option.of(s.getBytes()));
// check that damaged plan file will not block archived timeline loading.
HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
HoodieArchivedTimeline archivedTimeLine = metaClient.getArchivedTimeline().reload();
assertEquals(7 * 3, rawActiveTimeline.countInstants() + archivedTimeLine.countInstants());
// if there are damaged archive files and damaged plan, hoodie need throw ioe while loading archived timeline.
Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1");
FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes()));
assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHoodieTimelineArchiver method initTestTableAndGetWriteConfig.
private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata, int minArchivalCommits, int maxArchivalCommits, int maxDeltaCommits, int maxDeltaCommitsMetadataTable, HoodieTableType tableType, boolean enableArchiveMerge, int archiveFilesBatch, long size) throws Exception {
init(tableType);
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minArchivalCommits, maxArchivalCommits).withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommits).withArchiveMergeEnable(enableArchiveMerge).withArchiveMergeFilesBatchSize(archiveFilesBatch).withArchiveMergeSmallFileLimit(size).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadata).withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsMetadataTable).build()).forTable("test-trip-table").build();
initWriteConfigAndMetatableWriter(writeConfig, enableMetadata);
return writeConfig;
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveTableWithCleanCommits.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveTableWithCleanCommits(boolean enableMetadata) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 2);
// min archival commits is 2 and max archival commits is 4
// (either clean commits has to be > 4 or commits has to be greater than 4)
// and so, after 5th commit, 3 commits will be archived.
// 1,2,3,4,5,6 : after archival -> 1,5,6 (because, 2,3,4,5 and 6 are clean commits and are eligible for archival)
// after 7th and 8th commit no-op wrt archival.
Map<String, Integer> cleanStats = new HashMap<>();
cleanStats.put("p1", 1);
cleanStats.put("p2", 2);
for (int i = 1; i < 9; i++) {
if (i == 1) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 10);
} else if (i < 7) {
testTable.doClean("0000000" + i, cleanStats);
} else {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
}
// trigger archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
if (i < 6) {
assertEquals(originalCommits, commitsAfterArchival);
} else if (i == 6) {
if (!enableMetadata) {
// 1,2,3,4,5,6 : after archival -> 1,5,6 (bcoz, 2,3,4,5 and 6 are clean commits and are eligible for archival)
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000005", "00000006"), HoodieTimeline.CLEAN_ACTION));
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004"), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
} else {
// with metadata enabled, archival in data table is fenced based on compaction in metadata table. Clean commits in data table will not trigger compaction in
// metadata table.
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000002", "00000003", "00000004", "00000005", "00000006"), HoodieTimeline.CLEAN_ACTION));
verifyArchival(getAllArchivedCommitInstants(Collections.emptyList(), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
}
} else {
if (!enableMetadata) {
assertEquals(originalCommits, commitsAfterArchival);
} else {
if (i == 7) {
// when i == 7 compaction in metadata table will be triggered and hence archival in datatable will kick in.
// 1,2,3,4,5,6 : after archival -> 1,5,6 (bcoz, 2,3,4,5 and 6 are clean commits and are eligible for archival)
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001", "00000007")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000005", "00000006"), HoodieTimeline.CLEAN_ACTION));
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004"), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
} else {
assertEquals(originalCommits, commitsAfterArchival);
}
}
}
}
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveRollbacksAndCleanTestTable.
@Test
public void testArchiveRollbacksAndCleanTestTable() throws Exception {
int minArchiveCommits = 2;
int maxArchiveCommits = 9;
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, minArchiveCommits, maxArchiveCommits, 2);
// trigger 1 commit to add lot of files so that future cleans can clean them up
testTable.doWriteOperation("00000001", WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 20);
Map<String, Integer> partitionToFileDeleteCount = new HashMap<>();
partitionToFileDeleteCount.put("p1", 1);
partitionToFileDeleteCount.put("p2", 1);
// we are triggering 10 clean commits. (1 is commit, 2 -> 11 is clean)
for (int i = 2; i <= (maxArchiveCommits + 2); i++) {
testTable.doClean((i > 9 ? ("000000") : ("0000000")) + i, partitionToFileDeleteCount);
}
// we are triggering 7 commits and 7 rollbacks for the same
for (int i = 12; i <= (2 * maxArchiveCommits); i += 2) {
testTable.doWriteOperation("000000" + i, WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
testTable.doRollback("000000" + i, "000000" + (i + 1));
}
// trigger archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
// out of 10 clean commits, 8 will be archived. 2 to 9. 10 and 11 will be active.
// wrt regular commits, there aren't 9 commits yet and so all of them will be active.
List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000010", "00000011"), HoodieTimeline.CLEAN_ACTION));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000001", "00000012", "00000014", "00000016", "00000018")));
expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000013", "00000015", "00000017", "00000019"), HoodieTimeline.ROLLBACK_ACTION));
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004", "00000005", "00000006", "00000007", "00000008", "00000009"), HoodieTimeline.CLEAN_ACTION), expectedActiveInstants, commitsAfterArchival);
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHoodieTimelineArchiver method testPendingClusteringWillBlockArchival.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testPendingClusteringWillBlockArchival(boolean enableMetadata) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 5, 2);
HoodieTestDataGenerator.createPendingReplaceFile(basePath, "00000000", wrapperFs.getConf());
for (int i = 1; i < 8; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 2);
// archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits, commitsAfterArchival);
}
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(7, timeline.countInstants(), "Since we have a pending clustering instant at 00000000, we should never archive any commit after 00000000");
}
Aggregations