use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.
the class BaseHoodieWriteClient method getInflightTimelineExcludeCompactionAndClustering.
/**
* Get inflight time line exclude compaction and clustering.
* @param metaClient
* @return
*/
private HoodieTimeline getInflightTimelineExcludeCompactionAndClustering(HoodieTableMetaClient metaClient) {
HoodieTimeline inflightTimelineWithReplaceCommit = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
HoodieTimeline inflightTimelineExcludeClusteringCommit = inflightTimelineWithReplaceCommit.filter(instant -> {
if (instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(metaClient, instant);
return !instantPlan.isPresent();
} else {
return true;
}
});
return inflightTimelineExcludeClusteringCommit;
}
use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.
the class CleanMetadataV1MigrationHandler method downgradeFrom.
@Override
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
ValidationUtils.checkArgument(input.getVersion() == 2, "Input version is " + input.getVersion() + ". Must be 2");
final Path basePath = new Path(metaClient.getBasePath());
final Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata().entrySet().stream().map(entry -> {
final String partitionPath = entry.getKey();
final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setDeletePathPatterns(partitionMetadata.getDeletePathPatterns().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setSuccessDeleteFiles(partitionMetadata.getSuccessDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPartitionPath(partitionPath).setFailedDeleteFiles(partitionMetadata.getFailedDeleteFiles().stream().map(path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path)).collect(Collectors.toList())).setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionPath).build();
return Pair.of(partitionPath, cleanPartitionMetadata);
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
return HoodieCleanMetadata.newBuilder().setEarliestCommitToRetain(input.getEarliestCommitToRetain()).setStartCleanTime(input.getStartCleanTime()).setTimeTakenInMillis(input.getTimeTakenInMillis()).setTotalFilesDeleted(input.getTotalFilesDeleted()).setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
}
use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.
the class AbstractTableFileSystemView method getLatestUnCompactedFileSlices.
@Override
public final Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionStr) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchAllStoredFileGroups(partitionPath).filter(fg -> !isFileGroupReplaced(fg.getFileGroupId())).map(fileGroup -> {
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
// if the file-group is under compaction, pick the latest before compaction instant time.
Option<Pair<String, CompactionOperation>> compactionWithInstantPair = getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
if (compactionWithInstantPair.isPresent()) {
String compactionInstantTime = compactionWithInstantPair.get().getLeft();
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
}
return Option.of(fileSlice);
}).map(Option::get).map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.
the class AbstractTableFileSystemView method buildFileGroups.
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieBaseFile> baseFileStream, Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
Map<Pair<String, String>, List<HoodieBaseFile>> baseFiles = baseFileStream.collect(Collectors.groupingBy((baseFile) -> {
String partitionPathStr = getPartitionPathFromFilePath(baseFile.getPath());
return Pair.of(partitionPathStr, baseFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), logFile.getPath().getParent());
return Pair.of(partitionPathStr, logFile.getFileId());
}));
Set<Pair<String, String>> fileIdSet = new HashSet<>(baseFiles.keySet());
fileIdSet.addAll(logFiles.keySet());
List<HoodieFileGroup> fileGroups = new ArrayList<>();
fileIdSet.forEach(pair -> {
String fileId = pair.getValue();
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, timeline);
if (baseFiles.containsKey(pair)) {
baseFiles.get(pair).forEach(group::addBaseFile);
}
if (logFiles.containsKey(pair)) {
logFiles.get(pair).forEach(group::addLogFile);
}
if (addPendingCompactionFileSlice) {
Option<Pair<String, CompactionOperation>> pendingCompaction = getPendingCompactionOperationWithInstant(group.getFileGroupId());
if (pendingCompaction.isPresent()) {
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
// so that any new ingestion uses the correct base-instant
group.addNewFileSliceAtInstant(pendingCompaction.get().getKey());
}
}
fileGroups.add(group);
});
return fileGroups;
}
use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.
the class IncrementalTimelineSyncFileSystemView method addRestoreInstant.
/**
* Add newly found restore instant.
*
* @param timeline Hoodie Timeline
* @param instant Restore Instant
*/
private void addRestoreInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
LOG.info("Syncing restore instant (" + instant + ")");
HoodieRestoreMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(timeline.getInstantDetails(instant).get(), HoodieRestoreMetadata.class);
Map<String, List<Pair<String, String>>> partitionFiles = metadata.getHoodieRestoreMetadata().entrySet().stream().flatMap(entry -> {
return entry.getValue().stream().flatMap(e -> e.getPartitionMetadata().entrySet().stream().flatMap(e2 -> {
return e2.getValue().getSuccessDeleteFiles().stream().map(x -> Pair.of(e2.getKey(), x));
}));
}).collect(Collectors.groupingBy(Pair::getKey));
partitionFiles.entrySet().stream().forEach(e -> {
removeFileSlicesForPartition(timeline, instant, e.getKey(), e.getValue().stream().map(x -> x.getValue()).collect(Collectors.toList()));
});
if (metadata.getRestoreInstantInfo() != null) {
Set<String> rolledbackInstants = metadata.getRestoreInstantInfo().stream().filter(instantInfo -> HoodieTimeline.REPLACE_COMMIT_ACTION.equals(instantInfo.getAction())).map(instantInfo -> instantInfo.getCommitTime()).collect(Collectors.toSet());
removeReplacedFileIdsAtInstants(rolledbackInstants);
}
LOG.info("Done Syncing restore instant (" + instant + ")");
}
Aggregations