use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class ScheduleCompactionActionExecutor method scheduleCompaction.
private HoodieCompactionPlan scheduleCompaction() {
LOG.info("Checking if compaction needs to be run on " + config.getBasePath());
// judge if we need to compact according to num delta commits and time elapsed
boolean compactable = needCompact(config.getInlineCompactTriggerStrategy());
if (compactable) {
LOG.info("Generating compaction plan for merge on read table " + config.getBasePath());
try {
SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView();
Set<HoodieFileGroupId> fgInPendingCompactionAndClustering = fileSystemView.getPendingCompactionOperations().map(instantTimeOpPair -> instantTimeOpPair.getValue().getFileGroupId()).collect(Collectors.toSet());
// exclude files in pending clustering from compaction.
fgInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getLeft).collect(Collectors.toSet()));
context.setJobStatus(this.getClass().getSimpleName(), "Compaction: generating compaction plan");
return compactor.generateCompactionPlan(context, table, config, instantTime, fgInPendingCompactionAndClustering);
} catch (IOException e) {
throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e);
}
}
return new HoodieCompactionPlan();
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class SpillableMapBasedFileSystemView method createFileIdToPendingCompactionMap.
@Override
protected Map<HoodieFileGroupId, Pair<String, CompactionOperation>> createFileIdToPendingCompactionMap(Map<HoodieFileGroupId, Pair<String, CompactionOperation>> fgIdToPendingCompaction) {
try {
LOG.info("Creating Pending Compaction map using external spillable Map. Max Mem=" + maxMemoryForPendingCompaction + ", BaseDir=" + baseStoreDir);
new File(baseStoreDir).mkdirs();
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> pendingMap = new ExternalSpillableMap<>(maxMemoryForPendingCompaction, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
pendingMap.putAll(fgIdToPendingCompaction);
return pendingMap;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class SpillableMapBasedFileSystemView method createFileIdToPendingClusteringMap.
@Override
protected Map<HoodieFileGroupId, HoodieInstant> createFileIdToPendingClusteringMap(final Map<HoodieFileGroupId, HoodieInstant> fileGroupsInClustering) {
try {
LOG.info("Creating file group id to clustering instant map using external spillable Map. Max Mem=" + maxMemoryForClusteringFileGroups + ", BaseDir=" + baseStoreDir);
new File(baseStoreDir).mkdirs();
Map<HoodieFileGroupId, HoodieInstant> pendingMap = new ExternalSpillableMap<>(maxMemoryForClusteringFileGroups, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
pendingMap.putAll(fileGroupsInClustering);
return pendingMap;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class SpillableMapBasedFileSystemView method createFileIdToReplaceInstantMap.
@Override
protected Map<HoodieFileGroupId, HoodieInstant> createFileIdToReplaceInstantMap(final Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups) {
try {
LOG.info("Creating file group id to replace instant map using external spillable Map. Max Mem=" + maxMemoryForReplaceFileGroups + ", BaseDir=" + baseStoreDir);
new File(baseStoreDir).mkdirs();
Map<HoodieFileGroupId, HoodieInstant> pendingMap = new ExternalSpillableMap<>(maxMemoryForReplaceFileGroups, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
pendingMap.putAll(replacedFileGroups);
return pendingMap;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class AbstractTableFileSystemView method resetFileGroupsReplaced.
/**
* Get replaced instant for each file group by looking at all commit instants.
*/
private void resetFileGroupsReplaced(HoodieTimeline timeline) {
HoodieTimer hoodieTimer = new HoodieTimer();
hoodieTimer.startTimer();
// for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
HoodieTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = replacedTimeline.getInstants().flatMap(instant -> {
try {
HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
// get replace instant mapping for each partition, fileId
return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().flatMap(entry -> entry.getValue().stream().map(e -> new AbstractMap.SimpleEntry<>(new HoodieFileGroupId(entry.getKey(), e), instant)));
} catch (HoodieIOException ex) {
if (ex.getIOException() instanceof FileNotFoundException) {
// Replace instant could be deleted by archive and FileNotFoundException could be threw during getInstantDetails function
// So that we need to catch the FileNotFoundException here and continue
LOG.warn(ex.getMessage());
return Stream.empty();
} else {
throw ex;
}
} catch (IOException e) {
throw new HoodieIOException("error reading commit metadata for " + instant);
}
});
Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
resetReplacedFileGroups(replacedFileGroups);
LOG.info("Took " + hoodieTimer.endTimer() + " ms to read " + replacedTimeline.countInstants() + " instants, " + replacedFileGroups.size() + " replaced file groups");
}
Aggregations