use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class TestHoodieSparkMergeOnReadTableRollback method testMORTableRestore.
@ParameterizedTest
@ValueSource(booleans = { true, false })
void testMORTableRestore(boolean restoreAfterCompaction) throws Exception {
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false).withMarkersType(MarkerType.DIRECT.name());
HoodieWriteConfig cfg = cfgBuilder.build();
Properties properties = new Properties();
properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().toString());
HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
try (final SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
List<HoodieRecord> records = insertAndGetRecords("001", client, dataGen, 200);
List<HoodieRecord> updates1 = updateAndGetRecords("002", client, dataGen, records);
List<HoodieRecord> updates2 = updateAndGetRecords("003", client, dataGen, records);
List<HoodieRecord> updates3 = updateAndGetRecords("004", client, dataGen, records);
validateRecords(cfg, metaClient, updates3);
if (!restoreAfterCompaction) {
// restore to 002 and validate records.
client.restoreToInstant("002");
validateRecords(cfg, metaClient, updates1);
} else {
// trigger compaction and then trigger couple of upserts followed by restore.
metaClient = HoodieTableMetaClient.reload(metaClient);
String compactionInstantTime = "005";
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
validateRecords(cfg, metaClient, updates3);
List<HoodieRecord> updates4 = updateAndGetRecords("006", client, dataGen, records);
List<HoodieRecord> updates5 = updateAndGetRecords("007", client, dataGen, records);
validateRecords(cfg, metaClient, updates5);
// restore to 003 and validate records.
client.restoreToInstant("003");
validateRecords(cfg, metaClient, updates2);
}
}
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class TestHoodieSparkMergeOnReadTableRollback method updateRecords.
private List<HoodieRecord> updateRecords(SparkRDDWriteClient client, HoodieTestDataGenerator dataGen, String commitTime, List<HoodieRecord> records, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, boolean assertLogFiles) throws IOException {
client.startCommitWithTime(commitTime);
records = dataGen.generateUpdates(commitTime, records);
JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
List<WriteStatus> statuses = client.upsert(writeRecords, commitTime).collect();
assertNoWriteErrors(statuses);
if (assertLogFiles) {
HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
table.getHoodieView().sync();
TableFileSystemView.SliceView tableRTFileSystemView = table.getSliceView();
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> allSlices = tableRTFileSystemView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, allSlices.stream().filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
assertTrue(allSlices.stream().anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += allSlices.stream().filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
assertTrue(numLogFiles > 0);
}
return records;
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class TestMarkerBasedRollbackStrategy method testCopyOnWriteRollback.
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testCopyOnWriteRollback(boolean useFileListingMetadata) throws Exception {
HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(true).withAutoCommit(false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(useFileListingMetadata).build()).withPath(basePath).build();
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, writeConfig)) {
// rollback 2nd commit and ensure stats reflect the info.
List<HoodieRollbackStat> stats = testRun(useFileListingMetadata, writeConfig, writeClient);
assertEquals(3, stats.size());
for (HoodieRollbackStat stat : stats) {
assertEquals(1, stat.getSuccessDeleteFiles().size());
assertEquals(0, stat.getFailedDeleteFiles().size());
assertEquals(0, stat.getCommandBlocksCount().size());
}
}
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class CompactionTestBase method executeCompactionWithReplacedFiles.
protected void executeCompactionWithReplacedFiles(String compactionInstantTime, SparkRDDWriteClient client, HoodieTable table, HoodieWriteConfig cfg, String[] partitions, Set<HoodieFileGroupId> replacedFileIds) throws IOException {
client.compact(compactionInstantTime);
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
assertFalse(fileSliceList.stream().anyMatch(fs -> replacedFileIds.contains(fs.getFileGroupId())), "Compacted files should not show up in latest slices");
// verify that there is a commit
table = getHoodieTable(HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).setLoadActiveTimelineOnLoad(true).build(), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
// verify compaction commit is visible in timeline
assertTrue(timeline.filterCompletedInstants().getInstants().filter(instant -> compactionInstantTime.equals(instant.getTimestamp())).findFirst().isPresent());
for (String partition : partitions) {
table.getSliceView().getLatestFileSlicesBeforeOrOn(partition, compactionInstantTime, true).forEach(fs -> {
// verify that all log files are merged
assertEquals(0, fs.getLogFiles().count());
assertTrue(fs.getBaseFile().isPresent());
});
}
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class TestAsyncCompaction method testRollbackInflightIngestionWithPendingCompaction.
@Test
public void testRollbackInflightIngestionWithPendingCompaction() throws Exception {
// Rollback inflight ingestion when there is pending compaction
HoodieWriteConfig cfg = getConfig(false);
String firstInstantTime = "001";
String secondInstantTime = "004";
String compactionInstantTime = "005";
String inflightInstantTime = "006";
String nextInflightInstantTime = "007";
int numRecs = 2000;
try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
// Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
HoodieInstant inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
// This should rollback
client.startCommitWithTime(nextInflightInstantTime);
// Validate
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
assertEquals(1, metaClient.getActiveTimeline().filterPendingExcludingCompaction().getInstants().count(), "Expect only one inflight instant");
// Expect pending Compaction to be present
pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
}
}
Aggregations