use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieTimelineArchiver method archiveAndGetCommitsList.
private Pair<List<HoodieInstant>, List<HoodieInstant>> archiveAndGetCommitsList(HoodieWriteConfig writeConfig) throws IOException {
metaClient.reloadActiveTimeline();
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
archiver.archiveIfRequired(context);
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> commitsAfterArchival = timeline.getInstants().collect(Collectors.toList());
return Pair.of(originalCommits, commitsAfterArchival);
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieTimelineArchiver method verifyInflightInstants.
private void verifyInflightInstants(HoodieTableMetaClient metaClient, int expectedTotalInstants) {
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getTimelineOfActions(Collections.singleton(HoodieTimeline.CLEAN_ACTION)).filterInflights();
assertEquals(expectedTotalInstants, timeline.countInstants(), "Loaded inflight clean actions and the count should match");
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieSparkCopyOnWriteTableArchiveWithReplace method testDeletePartitionAndArchive.
@ParameterizedTest
@ValueSource(booleans = { false, true })
public void testDeletePartitionAndArchive(boolean metadataEnabled) throws IOException {
HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.COPY_ON_WRITE);
HoodieWriteConfig writeConfig = getConfigBuilder(true).withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).retainCommits(1).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(metadataEnabled).build()).build();
try (SparkRDDWriteClient client = getHoodieWriteClient(writeConfig);
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(DEFAULT_PARTITION_PATHS)) {
// 1st write batch; 3 commits for 3 partitions
String instantTime1 = HoodieActiveTimeline.createNewInstantTime(1000);
client.startCommitWithTime(instantTime1);
client.insert(jsc().parallelize(dataGen.generateInsertsForPartition(instantTime1, 10, DEFAULT_FIRST_PARTITION_PATH), 1), instantTime1);
String instantTime2 = HoodieActiveTimeline.createNewInstantTime(2000);
client.startCommitWithTime(instantTime2);
client.insert(jsc().parallelize(dataGen.generateInsertsForPartition(instantTime2, 10, DEFAULT_SECOND_PARTITION_PATH), 1), instantTime2);
String instantTime3 = HoodieActiveTimeline.createNewInstantTime(3000);
client.startCommitWithTime(instantTime3);
client.insert(jsc().parallelize(dataGen.generateInsertsForPartition(instantTime3, 1, DEFAULT_THIRD_PARTITION_PATH), 1), instantTime3);
final HoodieTimeline timeline1 = metaClient.getCommitsTimeline().filterCompletedInstants();
assertEquals(21, countRecordsOptionallySince(jsc(), basePath(), sqlContext(), timeline1, Option.empty()));
// delete the 1st and the 2nd partition; 1 replace commit
final String instantTime4 = HoodieActiveTimeline.createNewInstantTime(4000);
client.startCommitWithTime(instantTime4, HoodieActiveTimeline.REPLACE_COMMIT_ACTION);
client.deletePartitions(Arrays.asList(DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH), instantTime4);
// 2nd write batch; 4 commits for the 3rd partition; the 3rd commit to trigger archiving the replace commit
for (int i = 5; i < 9; i++) {
String instantTime = HoodieActiveTimeline.createNewInstantTime(i * 1000);
client.startCommitWithTime(instantTime);
client.insert(jsc().parallelize(dataGen.generateInsertsForPartition(instantTime, 1, DEFAULT_THIRD_PARTITION_PATH), 1), instantTime);
}
// verify archived timeline
metaClient = HoodieTableMetaClient.reload(metaClient);
final HoodieTimeline archivedTimeline = metaClient.getArchivedTimeline();
assertTrue(archivedTimeline.containsInstant(instantTime1));
assertTrue(archivedTimeline.containsInstant(instantTime2));
assertTrue(archivedTimeline.containsInstant(instantTime3));
assertTrue(archivedTimeline.containsInstant(instantTime4), "should contain the replace commit.");
// verify records
final HoodieTimeline timeline2 = metaClient.getCommitTimeline().filterCompletedInstants();
assertEquals(5, countRecordsOptionallySince(jsc(), basePath(), sqlContext(), timeline2, Option.empty()), "should only have the 4 records from the 3rd partition.");
}
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class SparkUpsertDeltaCommitPartitioner method getSmallFiles.
@Override
protected List<SmallFile> getSmallFiles(String partitionPath) {
// Init here since this class (and member variables) might not have been initialized
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
if (commitTimeline.empty()) {
return Collections.emptyList();
}
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
// Find out all eligible small file slices, looking for
// smallest file in the partition to append to
List<FileSlice> smallFileSlicesCandidates = getSmallFileCandidates(partitionPath, latestCommitTime);
List<SmallFile> smallFileLocations = new ArrayList<>();
// Create SmallFiles from the eligible file slices
for (FileSlice smallFileSlice : smallFileSlicesCandidates) {
SmallFile sf = new SmallFile();
if (smallFileSlice.getBaseFile().isPresent()) {
// TODO : Move logic of file name, file id, base commit time handling inside file slice
String filename = smallFileSlice.getBaseFile().get().getFileName();
sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
sf.sizeBytes = getTotalFileSize(smallFileSlice);
smallFileLocations.add(sf);
} else {
HoodieLogFile logFile = smallFileSlice.getLogFiles().findFirst().get();
sf.location = new HoodieRecordLocation(FSUtils.getBaseCommitTimeFromLogPath(logFile.getPath()), FSUtils.getFileIdFromLogPath(logFile.getPath()));
sf.sizeBytes = getTotalFileSize(smallFileSlice);
smallFileLocations.add(sf);
}
}
return smallFileLocations;
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class UpsertPartitioner method getSmallFiles.
/**
* Returns a list of small files in the given partition path.
*/
protected List<SmallFile> getSmallFiles(String partitionPath) {
// smallFiles only for partitionPath
List<SmallFile> smallFileLocations = new ArrayList<>();
HoodieTimeline commitTimeline = table.getMetaClient().getCommitsTimeline().filterCompletedInstants();
if (!commitTimeline.empty()) {
// if we have some commits
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
List<HoodieBaseFile> allFiles = table.getBaseFileOnlyView().getLatestBaseFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
for (HoodieBaseFile file : allFiles) {
if (file.getFileSize() < config.getParquetSmallFileLimit()) {
String filename = file.getFileName();
SmallFile sf = new SmallFile();
sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
sf.sizeBytes = file.getFileSize();
smallFileLocations.add(sf);
}
}
}
return smallFileLocations;
}
Aggregations