use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveCommitTimeline.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveCommitTimeline(boolean enableMetadataTable) throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTestDataGenerator.createCommitFile(basePath, "1", wrapperFs.getConf());
HoodieInstant instant1 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
HoodieTestDataGenerator.createCommitFile(basePath, "2", wrapperFs.getConf());
Path markerPath = new Path(metaClient.getMarkerFolderPath("2"));
wrapperFs.mkdirs(markerPath);
HoodieInstant instant2 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "2");
HoodieTestDataGenerator.createCommitFile(basePath, "3", wrapperFs.getConf());
HoodieInstant instant3 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
// add 2 more instants to pass filter criteria set in compaction config above
HoodieTestDataGenerator.createCommitFile(basePath, "4", wrapperFs.getConf());
HoodieTestDataGenerator.createCommitFile(basePath, "5", wrapperFs.getConf());
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "5");
}
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
boolean result = archiver.archiveIfRequired(context);
assertTrue(result);
HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
List<HoodieInstant> archivedInstants = Arrays.asList(instant1, instant2, instant3);
assertEquals(new HashSet<>(archivedInstants), archivedTimeline.filterCompletedInstants().getInstants().collect(Collectors.toSet()));
assertFalse(wrapperFs.exists(markerPath));
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveEmptyTable.
@Test
public void testArchiveEmptyTable() throws Exception {
init();
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
boolean result = archiver.archiveIfRequired(context);
assertTrue(result);
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestHoodieTimelineArchiver method testMergeSmallArchiveFilesRecoverFromBuildPlanFailed.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableArchiveMerge) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
// do ingestion and trigger archive actions here.
for (int i = 1; i < 8; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
archiveAndGetCommitsList(writeConfig);
}
// build a merge small archive plan with dummy content
// this plan can not be deserialized.
HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
archiver.reOpenWriter();
Path plan = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
archiver.buildArchiveMergePlan(candidateFiles, plan, ".commits_.archive.3_1-0-1");
String s = "Dummy Content";
// stain the current merge plan file.
FileIOUtils.createFileInPath(metaClient.getFs(), plan, Option.of(s.getBytes()));
// check that damaged plan file will not block archived timeline loading.
HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
HoodieArchivedTimeline archivedTimeLine = metaClient.getArchivedTimeline().reload();
assertEquals(7 * 3, rawActiveTimeline.countInstants() + archivedTimeLine.countInstants());
// trigger several archive after left damaged merge small archive file plan.
for (int i = 1; i < 10; i++) {
testTable.doWriteOperation("1000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
archiveAndGetCommitsList(writeConfig);
}
// loading archived timeline and active timeline success
HoodieActiveTimeline rawActiveTimeline1 = new HoodieActiveTimeline(metaClient, false);
HoodieArchivedTimeline archivedTimeLine1 = metaClient.getArchivedTimeline().reload();
// check instant number
assertEquals(16 * 3, archivedTimeLine1.countInstants() + rawActiveTimeline1.countInstants());
// if there are damaged archive files and damaged plan, hoodie need throw ioe while loading archived timeline.
Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1");
FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes()));
assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestCommitsCommand method testShowArchivedCommits.
/**
* Test case of 'commits showarchived' command.
*/
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommits(boolean enableMetadataTable) throws Exception {
// Generate archive
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
// generate data and metadata
Map<String, Integer[]> data = new LinkedHashMap<>();
data.put("104", new Integer[] { 20, 10 });
data.put("103", new Integer[] { 15, 15 });
data.put("102", new Integer[] { 25, 45 });
data.put("101", new Integer[] { 35, 15 });
for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
String key = entry.getKey();
Integer[] value = entry.getValue();
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
}
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "104");
}
// archive
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context());
CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "100", "104"));
assertTrue(cr.isSuccess());
// archived 101 and 102 instant, generate expect data
assertEquals(2, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 2 instants not be archived!");
// archived 101 and 102 instants, remove 103 and 104 instant
data.remove("103");
data.remove("104");
String expected = generateExpectData(1, data);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.
the class TestCommitsCommand method testShowArchivedCommitsWithMultiCommitsFile.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommitsWithMultiCommitsFile(boolean enableMetadataTable) throws Exception {
// Generate archive
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
// generate data and metadata
Map<String, Integer[]> data = new LinkedHashMap<>();
for (int i = 194; i >= 154; i--) {
data.put(String.valueOf(i), new Integer[] { i, i });
}
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "194");
}
for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
String key = entry.getKey();
Integer[] value = entry.getValue();
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
// archive
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
// need to create multi archive files
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context());
}
CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "160", "174"));
assertTrue(cr.isSuccess());
assertEquals(3, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 3 instants not be archived!");
Map<String, Integer[]> data2 = new LinkedHashMap<>();
for (int i = 174; i >= 161; i--) {
data2.put(String.valueOf(i), new Integer[] { i, i });
}
String expected = generateExpectData(1, data2);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
Aggregations