use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieSparkMergeOnReadTableRollback method validateRecords.
private void validateRecords(HoodieWriteConfig cfg, HoodieTableMetaClient metaClient, List<HoodieRecord> expectedRecords) throws IOException {
HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
FileStatus[] allFiles = listAllBaseFilesInPath(hoodieTable);
HoodieTableFileSystemView tableView = getHoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
List<String> inputPaths = tableView.getLatestBaseFiles().map(hf -> new Path(hf.getPath()).getParent().toString()).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths, basePath());
assertRecords(expectedRecords, recordsRead);
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieSparkMergeOnReadTableRollback method testCOWToMORConvertedTableRollback.
@ParameterizedTest
@ValueSource(booleans = { true, false })
void testCOWToMORConvertedTableRollback(boolean rollbackUsingMarkers) throws Exception {
// Set TableType to COW
HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.COPY_ON_WRITE);
HoodieWriteConfig cfg = getConfig(false, rollbackUsingMarkers);
try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
/*
* Write 1 (only inserts)
*/
String newCommitTime = "001";
client.startCommitWithTime(newCommitTime);
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
// verify there are no errors
assertNoWriteErrors(statuses);
client.commit(newCommitTime, jsc().parallelize(statuses));
metaClient = HoodieTableMetaClient.reload(metaClient);
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertTrue(commit.isPresent());
assertEquals("001", commit.get().getTimestamp(), "commit should be 001");
/*
* Write 2 (updates)
*/
newCommitTime = "002";
client.startCommitWithTime(newCommitTime);
records = dataGen.generateUpdates(newCommitTime, records);
statuses = client.upsert(jsc().parallelize(records, 1), newCommitTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
// Set TableType to MOR
metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ);
// rollback a COW commit when TableType is MOR
client.rollback(newCommitTime);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
FileStatus[] allFiles = listAllBaseFilesInPath(hoodieTable);
HoodieTableFileSystemView tableView = getHoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
final String absentCommit = newCommitTime;
assertAll(tableView.getLatestBaseFiles().map(file -> () -> assertNotEquals(absentCommit, file.getCommitTime())));
}
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestHoodieSparkMergeOnReadTableRollback method updateRecords.
private List<HoodieRecord> updateRecords(SparkRDDWriteClient client, HoodieTestDataGenerator dataGen, String commitTime, List<HoodieRecord> records, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, boolean assertLogFiles) throws IOException {
client.startCommitWithTime(commitTime);
records = dataGen.generateUpdates(commitTime, records);
JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
List<WriteStatus> statuses = client.upsert(writeRecords, commitTime).collect();
assertNoWriteErrors(statuses);
if (assertLogFiles) {
HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
table.getHoodieView().sync();
TableFileSystemView.SliceView tableRTFileSystemView = table.getSliceView();
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> allSlices = tableRTFileSystemView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, allSlices.stream().filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
assertTrue(allSlices.stream().anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += allSlices.stream().filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
assertTrue(numLogFiles > 0);
}
return records;
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class TestMarkerBasedRollbackStrategy method testCopyOnWriteRollbackWithTestTable.
@Test
public void testCopyOnWriteRollbackWithTestTable() throws Exception {
// given: wrote some base files and corresponding markers
HoodieTestTable testTable = HoodieTestTable.of(metaClient);
String f0 = testTable.addRequestedCommit("000").getFileIdsWithBaseFilesInPartitions("partA").get("partA");
String f1 = testTable.addCommit("001").withBaseFilesInPartition("partA", f0).getFileIdsWithBaseFilesInPartitions("partB").get("partB");
String f2 = "f2";
testTable.forCommit("001").withMarkerFile("partA", f0, IOType.MERGE).withMarkerFile("partB", f1, IOType.CREATE).withMarkerFile("partA", f2, IOType.CREATE);
// when
HoodieTable hoodieTable = HoodieSparkTable.create(getConfig(), context, metaClient);
List<HoodieRollbackRequest> rollbackRequests = new MarkerBasedRollbackStrategy(hoodieTable, context, getConfig(), "002").getRollbackRequests(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"));
List<HoodieRollbackStat> stats = new BaseRollbackHelper(hoodieTable.getMetaClient(), getConfig()).performRollback(context, new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"), rollbackRequests);
// then: ensure files are deleted correctly, non-existent files reported as failed deletes
assertEquals(2, stats.size());
FileStatus[] partAFiles = testTable.listAllFilesInPartition("partA");
FileStatus[] partBFiles = testTable.listAllFilesInPartition("partB");
assertEquals(0, partBFiles.length);
assertEquals(1, partAFiles.length);
assertEquals(2, stats.stream().mapToInt(r -> r.getSuccessDeleteFiles().size()).sum());
assertEquals(1, stats.stream().mapToInt(r -> r.getFailedDeleteFiles().size()).sum());
}
use of org.apache.hudi.table.HoodieTable in project hudi by apache.
the class CompactionTestBase method executeCompactionWithReplacedFiles.
protected void executeCompactionWithReplacedFiles(String compactionInstantTime, SparkRDDWriteClient client, HoodieTable table, HoodieWriteConfig cfg, String[] partitions, Set<HoodieFileGroupId> replacedFileIds) throws IOException {
client.compact(compactionInstantTime);
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
assertFalse(fileSliceList.stream().anyMatch(fs -> replacedFileIds.contains(fs.getFileGroupId())), "Compacted files should not show up in latest slices");
// verify that there is a commit
table = getHoodieTable(HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).setLoadActiveTimelineOnLoad(true).build(), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
// verify compaction commit is visible in timeline
assertTrue(timeline.filterCompletedInstants().getInstants().filter(instant -> compactionInstantTime.equals(instant.getTimestamp())).findFirst().isPresent());
for (String partition : partitions) {
table.getSliceView().getLatestFileSlicesBeforeOrOn(partition, compactionInstantTime, true).forEach(fs -> {
// verify that all log files are merged
assertEquals(0, fs.getLogFiles().count());
assertTrue(fs.getBaseFile().isPresent());
});
}
}
Aggregations