use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.
the class TestHoodieTableFileSystemView method testReplaceWithTimeTravel.
@Test
public void testReplaceWithTimeTravel() throws IOException {
String partitionPath1 = "2020/06/27";
new File(basePath + "/" + partitionPath1).mkdirs();
// create 2 fileId in partition1 - fileId1 is replaced later on.
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
assertFalse(roView.getLatestBaseFiles(partitionPath1).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2)), "No commit, should not find any data file");
// Only one commit
String commitTime1 = "1";
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
saveAsComplete(commitTimeline, instant1, Option.empty());
refreshFsView();
assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
// create commit2 - fileId1 is replaced. new file groups fileId3,fileId4 are created.
String fileId3 = UUID.randomUUID().toString();
String fileId4 = UUID.randomUUID().toString();
String fileName3 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
String fileName4 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
String commitTime2 = "2";
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
List<String> replacedFileIds = new ArrayList<>();
replacedFileIds.add(fileId1);
partitionToReplaceFileIds.put(partitionPath1, replacedFileIds);
HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
// make sure view doesnt include fileId1
refreshFsView();
assertEquals(0, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
// exclude commit 2 and make sure fileId1 shows up in view.
SyncableFileSystemView filteredView = getFileSystemView(metaClient.getActiveTimeline().findInstantsBefore("2"), false);
assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
// ensure replacedFileGroupsBefore works with all instants
List<HoodieFileGroup> replacedOnInstant1 = fsView.getReplacedFileGroupsBeforeOrOn("1", partitionPath1).collect(Collectors.toList());
assertEquals(0, replacedOnInstant1.size());
List<HoodieFileGroup> allReplaced = fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath1).collect(Collectors.toList());
assertEquals(1, allReplaced.size());
assertEquals(fileId1, allReplaced.get(0).getFileGroupId().getFileId());
allReplaced = fsView.getReplacedFileGroupsBefore("2", partitionPath1).collect(Collectors.toList());
assertEquals(0, allReplaced.size());
allReplaced = fsView.getAllReplacedFileGroups(partitionPath1).collect(Collectors.toList());
assertEquals(1, allReplaced.size());
assertEquals(fileId1, allReplaced.get(0).getFileGroupId().getFileId());
}
use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.
the class TestHoodieTableFileSystemView method testCloseHoodieTableFileSystemView.
@Test
public void testCloseHoodieTableFileSystemView() throws Exception {
String instantTime1 = "1";
String instantTime2 = "2";
String clusteringInstantTime3 = "3";
String clusteringInstantTime4 = "4";
// prepare metadata
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
List<String> replacedFileIds = new ArrayList<>();
replacedFileIds.add("fake_file_id");
partitionToReplaceFileIds.put("fake_partition_path", replacedFileIds);
// prepare Instants
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime2);
HoodieInstant clusteringInstant3 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringInstantTime3);
HoodieInstant clusteringInstant4 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringInstantTime4);
HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.CLUSTER, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
saveAsComplete(commitTimeline, instant1, Option.empty());
saveAsComplete(commitTimeline, instant2, Option.empty());
saveAsComplete(commitTimeline, clusteringInstant3, Option.empty());
saveAsComplete(commitTimeline, clusteringInstant4, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
refreshFsView();
// Now create a scenario where archiving deleted replace commits (requested,inflight and replacecommit)
boolean deleteReplaceCommit = new File(this.basePath + "/.hoodie/" + clusteringInstantTime3 + ".replacecommit").delete();
boolean deleteReplaceCommitRequested = new File(this.basePath + "/.hoodie/" + clusteringInstantTime3 + ".replacecommit.requested").delete();
boolean deleteReplaceCommitInflight = new File(this.basePath + "/.hoodie/" + clusteringInstantTime3 + ".replacecommit.inflight").delete();
// confirm deleted
assertTrue(deleteReplaceCommit && deleteReplaceCommitInflight && deleteReplaceCommitRequested);
assertDoesNotThrow(() -> fsView.close());
}
use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.
the class HoodieTableMetadataUtil method processRollbackMetadata.
/**
* Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
* <p>
* During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
* function will extract this change file for each partition.
*
* @param metadataTableTimeline Current timeline of the Metadata Table
* @param rollbackMetadata {@code HoodieRollbackMetadata}
* @param partitionToDeletedFiles The {@code Map} to fill with files deleted per partition.
* @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
*/
private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata, Map<String, List<String>> partitionToDeletedFiles, Map<String, Map<String, Long>> partitionToAppendedFiles, Option<String> lastSyncTs) {
rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
final String instantToRollback = rollbackMetadata.getCommitsRollback().get(0);
// Has this rollback produced new files?
boolean hasRollbackLogFiles = pm.getRollbackLogFiles() != null && !pm.getRollbackLogFiles().isEmpty();
boolean hasNonZeroRollbackLogFiles = hasRollbackLogFiles && pm.getRollbackLogFiles().values().stream().mapToLong(Long::longValue).sum() > 0;
// If instant-to-rollback has not been synced to metadata table yet then there is no need to update metadata
// This can happen in two cases:
// Case 1: Metadata Table timeline is behind the instant-to-rollback.
boolean shouldSkip = lastSyncTs.isPresent() && HoodieTimeline.compareTimestamps(instantToRollback, HoodieTimeline.GREATER_THAN, lastSyncTs.get());
if (!hasNonZeroRollbackLogFiles && shouldSkip) {
LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, given metadata table is already synced upto to %s", instantToRollback, lastSyncTs.get()));
return;
}
// Case 2: The instant-to-rollback was never committed to Metadata Table. This can happen if the instant-to-rollback
// was a failed commit (never completed) as only completed instants are synced to Metadata Table.
// But the required Metadata Table instants should not have been archived
HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, instantToRollback);
if (metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
throw new HoodieMetadataException(String.format("The instant %s required to sync rollback of %s has been archived", syncedInstant, instantToRollback));
}
shouldSkip = !metadataTableTimeline.containsInstant(syncedInstant);
if (!hasNonZeroRollbackLogFiles && shouldSkip) {
LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, since this instant was never committed to Metadata Table", instantToRollback));
return;
}
final String partition = pm.getPartitionPath();
if ((!pm.getSuccessDeleteFiles().isEmpty() || !pm.getFailedDeleteFiles().isEmpty()) && !shouldSkip) {
if (!partitionToDeletedFiles.containsKey(partition)) {
partitionToDeletedFiles.put(partition, new ArrayList<>());
}
// Extract deleted file name from the absolute paths saved in getSuccessDeleteFiles()
List<String> deletedFiles = pm.getSuccessDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList());
if (!pm.getFailedDeleteFiles().isEmpty()) {
deletedFiles.addAll(pm.getFailedDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList()));
}
partitionToDeletedFiles.get(partition).addAll(deletedFiles);
}
BiFunction<Long, Long, Long> fileMergeFn = (oldSize, newSizeCopy) -> {
// as rollback file could have been updated after written log files are computed.
return oldSize > newSizeCopy ? oldSize : newSizeCopy;
};
if (hasRollbackLogFiles) {
if (!partitionToAppendedFiles.containsKey(partition)) {
partitionToAppendedFiles.put(partition, new HashMap<>());
}
// Extract appended file name from the absolute paths saved in getAppendFiles()
pm.getRollbackLogFiles().forEach((path, size) -> {
partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
});
}
});
}
use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.
the class HoodieBackedTableMetadata method getValidInstantTimestamps.
private Set<String> getValidInstantTimestamps() {
// Only those log files which have a corresponding completed instant on the dataset should be read
// This is because the metadata table is updated before the dataset instants are committed.
HoodieActiveTimeline datasetTimeline = dataMetaClient.getActiveTimeline();
Set<String> validInstantTimestamps = datasetTimeline.filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
// For any rollbacks and restores, we cannot neglect the instants that they are rolling back.
// The rollback instant should be more recent than the start of the timeline for it to have rolled back any
// instant which we have a log block for.
final String earliestInstantTime = validInstantTimestamps.isEmpty() ? SOLO_COMMIT_TIMESTAMP : Collections.min(validInstantTimestamps);
datasetTimeline.getRollbackAndRestoreTimeline().filterCompletedInstants().getInstants().filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN, earliestInstantTime)).forEach(instant -> {
validInstantTimestamps.addAll(getRollbackedCommits(instant, datasetTimeline));
});
// SOLO_COMMIT_TIMESTAMP is used during bootstrap so it is a valid timestamp
validInstantTimestamps.add(SOLO_COMMIT_TIMESTAMP);
return validInstantTimestamps;
}
use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.
the class TestHoodieTableMetaClient method checkSerDe.
@Test
public void checkSerDe() {
// check if this object is serialized and de-serialized, we are able to read from the file system
HoodieTableMetaClient deserializedMetaClient = HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
assertNotNull(deserializedMetaClient);
HoodieActiveTimeline commitTimeline = deserializedMetaClient.getActiveTimeline();
HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
commitTimeline.createNewInstant(instant);
commitTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes()));
commitTimeline = commitTimeline.reload();
HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
assertEquals(completedInstant, commitTimeline.getInstants().findFirst().get(), "Commit should be 1 and completed");
assertArrayEquals("test-detail".getBytes(), commitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\"");
}
Aggregations