use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveCommitsWithCompactionCommitInMetadataTableTimeline.
@Test
public void testArchiveCommitsWithCompactionCommitInMetadataTableTimeline() throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 4, 20);
int startInstantTime = 100;
int numCommits = 15;
// "100" till "105" should be archived in this case
int numExpectedArchived = 6;
for (int i = startInstantTime; i < startInstantTime + numCommits; i++) {
HoodieTestDataGenerator.createCommitFile(basePath, Integer.toString(i), wrapperFs.getConf());
}
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
HoodieTable table = HoodieSparkTable.create(writeConfig, context);
HoodieTimelineArchiver archiveLog = new HoodieTimelineArchiver(writeConfig, table);
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals(numCommits, timeline.countInstants(), String.format("Loaded %d commits and the count should match", numCommits));
assertTrue(archiveLog.archiveIfRequired(context));
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(numCommits - numExpectedArchived, timeline.countInstants(), "Since we have a compaction commit of 105 in metadata table timeline, we should never archive any commit after that");
for (int i = startInstantTime + numExpectedArchived; i < startInstantTime + numCommits; i++) {
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, Integer.toString(i))), String.format("Commit %d should not be archived", i));
}
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveInflightClean.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveInflightClean(boolean enableMetadataTable) throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
createCleanMetadata("10", false);
createCleanMetadata("11", false);
HoodieInstant notArchivedInstant1 = createCleanMetadata("12", false);
HoodieInstant notArchivedInstant2 = createCleanMetadata("13", false);
HoodieInstant notArchivedInstant3 = createCleanMetadata("14", true);
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "14");
}
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context);
List<HoodieInstant> notArchivedInstants = metaClient.getActiveTimeline().reload().getInstants().collect(Collectors.toList());
assertEquals(3, notArchivedInstants.size(), "Not archived instants should be 3");
assertEquals(notArchivedInstants, Arrays.asList(notArchivedInstant1, notArchivedInstant2, notArchivedInstant3), "");
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieTimelineArchiver method archiveAndGetCommitsList.
private Pair<List<HoodieInstant>, List<HoodieInstant>> archiveAndGetCommitsList(HoodieWriteConfig writeConfig) throws IOException {
metaClient.reloadActiveTimeline();
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
archiver.archiveIfRequired(context);
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> commitsAfterArchival = timeline.getInstants().collect(Collectors.toList());
return Pair.of(originalCommits, commitsAfterArchival);
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveCommitTimeline.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveCommitTimeline(boolean enableMetadataTable) throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTestDataGenerator.createCommitFile(basePath, "1", wrapperFs.getConf());
HoodieInstant instant1 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
HoodieTestDataGenerator.createCommitFile(basePath, "2", wrapperFs.getConf());
Path markerPath = new Path(metaClient.getMarkerFolderPath("2"));
wrapperFs.mkdirs(markerPath);
HoodieInstant instant2 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "2");
HoodieTestDataGenerator.createCommitFile(basePath, "3", wrapperFs.getConf());
HoodieInstant instant3 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
// add 2 more instants to pass filter criteria set in compaction config above
HoodieTestDataGenerator.createCommitFile(basePath, "4", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "5", wrapperFs.getConf());
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "5");
}
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
boolean result = archiver.archiveIfRequired(context);
assertTrue(result);
HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
List<HoodieInstant> archivedInstants = Arrays.asList(instant1, instant2, instant3);
assertEquals(new HashSet<>(archivedInstants), archivedTimeline.filterCompletedInstants().getInstants().collect(Collectors.toSet()));
assertFalse(wrapperFs.exists(markerPath));
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveEmptyTable.
@Test
public void testArchiveEmptyTable() throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
boolean result = archiver.archiveIfRequired(context);
assertTrue(result);
}
Aggregations