use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestCommitsCommand method testShowArchivedCommits.
/**
* Test case of 'commits showarchived' command.
*/
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommits(boolean enableMetadataTable) throws Exception {
// Generate archive
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
// generate data and metadata
Map<String, Integer[]> data = new LinkedHashMap<>();
data.put("104", new Integer[] { 20, 10 });
data.put("103", new Integer[] { 15, 15 });
data.put("102", new Integer[] { 25, 45 });
data.put("101", new Integer[] { 35, 15 });
for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
String key = entry.getKey();
Integer[] value = entry.getValue();
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
}
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "104");
}
// archive
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context());
CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "100", "104"));
assertTrue(cr.isSuccess());
// archived 101 and 102 instant, generate expect data
assertEquals(2, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 2 instants not be archived!");
// archived 101 and 102 instants, remove 103 and 104 instant
data.remove("103");
data.remove("104");
String expected = generateExpectData(1, data);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestCommitsCommand method testShowArchivedCommitsWithMultiCommitsFile.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommitsWithMultiCommitsFile(boolean enableMetadataTable) throws Exception {
// Generate archive
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
// generate data and metadata
Map<String, Integer[]> data = new LinkedHashMap<>();
for (int i = 194; i >= 154; i--) {
data.put(String.valueOf(i), new Integer[] { i, i });
}
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "194");
}
for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
String key = entry.getKey();
Integer[] value = entry.getValue();
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
// archive
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
// need to create multi archive files
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context());
}
CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "160", "174"));
assertTrue(cr.isSuccess());
assertEquals(3, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 3 instants not be archived!");
Map<String, Integer[]> data2 = new LinkedHashMap<>();
for (int i = 174; i >= 161; i--) {
data2.put(String.valueOf(i), new Integer[] { i, i });
}
String expected = generateExpectData(1, data2);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestCompactionCommand method generateArchive.
private void generateArchive() throws IOException {
// Generate archive
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).forTable("test-trip-table").build();
// archive
HoodieTableMetaClient metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context());
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class SparkMain method deleteMarker.
protected static int deleteMarker(JavaSparkContext jsc, String instantTime, String basePath) {
try {
SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
HoodieWriteConfig config = client.getConfig();
HoodieEngineContext context = client.getEngineContext();
HoodieSparkTable table = HoodieSparkTable.create(config, context, true);
WriteMarkersFactory.get(config.getMarkersType(), table, instantTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
return 0;
} catch (Exception e) {
LOG.warn(String.format("Failed: Could not clean marker instantTime: \"%s\".", instantTime), e);
return -1;
}
}
Aggregations