use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.
the class IncrementalTimelineSyncFileSystemView method removeFileSlicesForPartition.
private void removeFileSlicesForPartition(HoodieTimeline timeline, HoodieInstant instant, String partition, List<String> paths) {
if (isPartitionAvailableInStore(partition)) {
LOG.info("Removing file slices for partition (" + partition + ") for instant (" + instant + ")");
FileStatus[] statuses = paths.stream().map(p -> {
FileStatus status = new FileStatus();
status.setPath(new Path(p));
return status;
}).toArray(FileStatus[]::new);
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(), false);
applyDeltaFileSlicesToPartitionView(partition, fileGroups, DeltaApplyMode.REMOVE);
} else {
LOG.warn("Skipping partition (" + partition + ") when syncing instant (" + instant + ") as it is not loaded");
}
}
use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.
the class IncrementalTimelineSyncFileSystemView method addPendingCompactionInstant.
/**
* Add newly found compaction instant.
*
* @param timeline Hoodie Timeline
* @param instant Compaction Instant
*/
private void addPendingCompactionInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
LOG.info("Syncing pending compaction instant (" + instant + ")");
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(metaClient, instant.getTimestamp());
List<Pair<String, CompactionOperation>> pendingOps = CompactionUtils.getPendingCompactionOperations(instant, compactionPlan).map(p -> Pair.of(p.getValue().getKey(), CompactionOperation.convertFromAvroRecordInstance(p.getValue().getValue()))).collect(Collectors.toList());
// First, update Pending compaction instants
addPendingCompactionOperations(pendingOps.stream());
Map<String, List<Pair<String, HoodieFileGroup>>> partitionToFileGroups = pendingOps.stream().map(opPair -> {
String compactionInstantTime = opPair.getKey();
HoodieFileGroup fileGroup = new HoodieFileGroup(opPair.getValue().getFileGroupId(), timeline);
fileGroup.addNewFileSliceAtInstant(compactionInstantTime);
return Pair.of(compactionInstantTime, fileGroup);
}).collect(Collectors.groupingBy(x -> x.getValue().getPartitionPath()));
partitionToFileGroups.entrySet().forEach(entry -> {
if (isPartitionAvailableInStore(entry.getKey())) {
applyDeltaFileSlicesToPartitionView(entry.getKey(), entry.getValue().stream().map(Pair::getValue).collect(Collectors.toList()), DeltaApplyMode.ADD);
}
});
}
use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.
the class SpillableMapBasedFileSystemView method createPartitionToFileGroups.
@Override
protected Map<String, List<HoodieFileGroup>> createPartitionToFileGroups() {
try {
LOG.info("Creating Partition To File groups map using external spillable Map. Max Mem=" + maxMemoryForFileGroupMap + ", BaseDir=" + baseStoreDir);
new File(baseStoreDir).mkdirs();
return (Map<String, List<HoodieFileGroup>>) (new ExternalSpillableMap<>(maxMemoryForFileGroupMap, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.
the class TestHoodieTableFileSystemView method testReplaceFileIdIsExcludedInView.
@Test
public void testReplaceFileIdIsExcludedInView() throws IOException {
String partitionPath1 = "2020/06/27";
String partitionPath2 = "2020/07/14";
new File(basePath + "/" + partitionPath1).mkdirs();
new File(basePath + "/" + partitionPath2).mkdirs();
// create 2 fileId in partition1 - fileId1 is replaced later on.
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
// create 2 fileId in partition2 - fileId3, fileId4 is replaced later on.
String fileId3 = UUID.randomUUID().toString();
String fileId4 = UUID.randomUUID().toString();
assertFalse(roView.getLatestBaseFiles(partitionPath1).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2)), "No commit, should not find any data file");
assertFalse(roView.getLatestBaseFiles(partitionPath2).anyMatch(dfile -> dfile.getFileId().equals(fileId3) || dfile.getFileId().equals(fileId4)), "No commit, should not find any data file");
// Only one commit
String commitTime1 = "1";
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
String fileName3 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
String fileName4 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
List<String> replacedFileIdsP1 = new ArrayList<>();
replacedFileIdsP1.add(fileId1);
partitionToReplaceFileIds.put(partitionPath1, replacedFileIdsP1);
List<String> replacedFileIdsP2 = new ArrayList<>();
replacedFileIdsP2.add(fileId3);
replacedFileIdsP2.add(fileId4);
partitionToReplaceFileIds.put(partitionPath2, replacedFileIdsP2);
HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime1);
saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
refreshFsView();
assertEquals(0, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
assertEquals(fileName2, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).findFirst().get().getFileName());
assertEquals(0, roView.getLatestBaseFiles(partitionPath2).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
assertEquals(0, roView.getLatestBaseFiles(partitionPath2).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
// ensure replacedFileGroupsBefore works with all instants
List<HoodieFileGroup> replacedOnInstant1 = fsView.getReplacedFileGroupsBeforeOrOn("0", partitionPath1).collect(Collectors.toList());
assertEquals(0, replacedOnInstant1.size());
List<HoodieFileGroup> allReplaced = fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath1).collect(Collectors.toList());
allReplaced.addAll(fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath2).collect(Collectors.toList()));
assertEquals(3, allReplaced.size());
Set<String> allReplacedFileIds = allReplaced.stream().map(fg -> fg.getFileGroupId().getFileId()).collect(Collectors.toSet());
Set<String> actualReplacedFileIds = Stream.of(fileId1, fileId3, fileId4).collect(Collectors.toSet());
assertEquals(actualReplacedFileIds, allReplacedFileIds);
}
use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.
the class TestHoodieTableFileSystemView method testStreamEveryVersionInPartition.
protected void testStreamEveryVersionInPartition(boolean isLatestFileSliceOnly) throws IOException {
// Put some files in the partition
String fullPartitionPath = basePath + "/2016/05/01/";
new File(fullPartitionPath).mkdirs();
String commitTime1 = "1";
String commitTime2 = "2";
String commitTime3 = "3";
String commitTime4 = "4";
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
String fileId3 = UUID.randomUUID().toString();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
// Now we list the entire partition
FileStatus[] statuses = metaClient.getFs().listStatus(new Path(fullPartitionPath));
assertEquals(7, statuses.length);
refreshFsView();
List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups("2016/05/01").collect(Collectors.toList());
assertEquals(3, fileGroups.size());
for (HoodieFileGroup fileGroup : fileGroups) {
String fileId = fileGroup.getFileGroupId().getFileId();
Set<String> filenames = new HashSet<>();
fileGroup.getAllBaseFiles().forEach(dataFile -> {
assertEquals(fileId, dataFile.getFileId(), "All same fileId should be grouped");
filenames.add(dataFile.getFileName());
});
Set<String> expFileNames = new HashSet<>();
if (fileId.equals(fileId1)) {
if (!isLatestFileSliceOnly) {
expFileNames.add(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1));
}
expFileNames.add(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1));
assertEquals(expFileNames, filenames);
} else if (fileId.equals(fileId2)) {
if (!isLatestFileSliceOnly) {
expFileNames.add(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2));
expFileNames.add(FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2));
}
expFileNames.add(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2));
assertEquals(expFileNames, filenames);
} else {
if (!isLatestFileSliceOnly) {
expFileNames.add(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3));
}
expFileNames.add(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3));
assertEquals(expFileNames, filenames);
}
}
}
Aggregations