use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testLoadArchiveTimelineWithUncompletedMergeArchiveFile.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testLoadArchiveTimelineWithUncompletedMergeArchiveFile(boolean enableArchiveMerge) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
for (int i = 1; i < 8; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
archiveAndGetCommitsList(writeConfig);
}
HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
archiver.reOpenWriter();
archiver.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
archiver.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
HoodieLogFormat.Writer writer = archiver.reOpenWriter();
String s = "Dummy Content";
// stain the current merged archive file.
FileIOUtils.createFileInPath(metaClient.getFs(), writer.getLogFile().getPath(), Option.of(s.getBytes()));
// if there's only a damaged merged archive file, we need to ignore the exception while reading this damaged file.
HoodieActiveTimeline rawActiveTimeline1 = new HoodieActiveTimeline(metaClient, false);
HoodieArchivedTimeline archivedTimeLine1 = metaClient.getArchivedTimeline();
assertEquals(7 * 3, archivedTimeLine1.countInstants() + rawActiveTimeline1.countInstants());
// if there are a damaged merged archive files and other common damaged archive file.
// hoodie need throw ioe while loading archived timeline because of parsing the damaged archive file.
Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1");
FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes()));
assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveCommitSavepointNoHole.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveCommitSavepointNoHole(boolean enableMetadataTable) throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
HoodieTestDataGenerator.createCommitFile(basePath, "100", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "101", wrapperFs.getConf());
HoodieTestDataGenerator.createSavepointFile(basePath, "101", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "102", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "103", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "104", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "105", wrapperFs.getConf());
HoodieTable table = HoodieSparkTable.create(cfg, context);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
}
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
assertTrue(archiver.archiveIfRequired(context));
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(5, timeline.countInstants(), "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")), "Archived commits should always be safe");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")), "Archived commits should always be safe");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")), "Archived commits should always be safe");
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveCommitsWithCompactionCommitInMetadataTableTimeline.
@Test
public void testArchiveCommitsWithCompactionCommitInMetadataTableTimeline() throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 4, 20);
int startInstantTime = 100;
int numCommits = 15;
// "100" till "105" should be archived in this case
int numExpectedArchived = 6;
for (int i = startInstantTime; i < startInstantTime + numCommits; i++) {
HoodieTestDataGenerator.createCommitFile(basePath, Integer.toString(i), wrapperFs.getConf());
}
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
HoodieTable table = HoodieSparkTable.create(writeConfig, context);
HoodieTimelineArchiver archiveLog = new HoodieTimelineArchiver(writeConfig, table);
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals(numCommits, timeline.countInstants(), String.format("Loaded %d commits and the count should match", numCommits));
assertTrue(archiveLog.archiveIfRequired(context));
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(numCommits - numExpectedArchived, timeline.countInstants(), "Since we have a compaction commit of 105 in metadata table timeline, we should never archive any commit after that");
for (int i = startInstantTime + numExpectedArchived; i < startInstantTime + numCommits; i++) {
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, Integer.toString(i))), String.format("Commit %d should not be archived", i));
}
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveInflightClean.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveInflightClean(boolean enableMetadataTable) throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
createCleanMetadata("10", false);
createCleanMetadata("11", false);
HoodieInstant notArchivedInstant1 = createCleanMetadata("12", false);
HoodieInstant notArchivedInstant2 = createCleanMetadata("13", false);
HoodieInstant notArchivedInstant3 = createCleanMetadata("14", true);
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "14");
}
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context);
List<HoodieInstant> notArchivedInstants = metaClient.getActiveTimeline().reload().getInstants().collect(Collectors.toList());
assertEquals(3, notArchivedInstants.size(), "Not archived instants should be 3");
assertEquals(notArchivedInstants, Arrays.asList(notArchivedInstant1, notArchivedInstant2, notArchivedInstant3), "");
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method archiveAndGetCommitsList.
private Pair<List<HoodieInstant>, List<HoodieInstant>> archiveAndGetCommitsList(HoodieWriteConfig writeConfig) throws IOException {
metaClient.reloadActiveTimeline();
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
archiver.archiveIfRequired(context);
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> commitsAfterArchival = timeline.getInstants().collect(Collectors.toList());
return Pair.of(originalCommits, commitsAfterArchival);
}
Aggregations