Search in sources :

Example 21 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class ScheduleCompactionActionExecutor method scheduleCompaction.

private HoodieCompactionPlan scheduleCompaction() {
    LOG.info("Checking if compaction needs to be run on " + config.getBasePath());
    // judge if we need to compact according to num delta commits and time elapsed
    boolean compactable = needCompact(config.getInlineCompactTriggerStrategy());
    if (compactable) {
        LOG.info("Generating compaction plan for merge on read table " + config.getBasePath());
        try {
            SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView();
            Set<HoodieFileGroupId> fgInPendingCompactionAndClustering = fileSystemView.getPendingCompactionOperations().map(instantTimeOpPair -> instantTimeOpPair.getValue().getFileGroupId()).collect(Collectors.toSet());
            // exclude files in pending clustering from compaction.
            fgInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getLeft).collect(Collectors.toSet()));
            context.setJobStatus(this.getClass().getSimpleName(), "Compaction: generating compaction plan");
            return compactor.generateCompactionPlan(context, table, config, instantTime, fgInPendingCompactionAndClustering);
        } catch (IOException e) {
            throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e);
        }
    }
    return new HoodieCompactionPlan();
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Logger(org.apache.log4j.Logger) Map(java.util.Map) ParseException(java.text.ParseException) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieCompactionException(org.apache.hudi.exception.HoodieCompactionException) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) EngineType(org.apache.hudi.common.engine.EngineType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCompactionException(org.apache.hudi.exception.HoodieCompactionException) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair)

Example 22 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class SpillableMapBasedFileSystemView method createFileIdToPendingCompactionMap.

@Override
protected Map<HoodieFileGroupId, Pair<String, CompactionOperation>> createFileIdToPendingCompactionMap(Map<HoodieFileGroupId, Pair<String, CompactionOperation>> fgIdToPendingCompaction) {
    try {
        LOG.info("Creating Pending Compaction map using external spillable Map. Max Mem=" + maxMemoryForPendingCompaction + ", BaseDir=" + baseStoreDir);
        new File(baseStoreDir).mkdirs();
        Map<HoodieFileGroupId, Pair<String, CompactionOperation>> pendingMap = new ExternalSpillableMap<>(maxMemoryForPendingCompaction, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
        pendingMap.putAll(fgIdToPendingCompaction);
        return pendingMap;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) IOException(java.io.IOException) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) File(java.io.File) Pair(org.apache.hudi.common.util.collection.Pair)

Example 23 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class SpillableMapBasedFileSystemView method createFileIdToPendingClusteringMap.

@Override
protected Map<HoodieFileGroupId, HoodieInstant> createFileIdToPendingClusteringMap(final Map<HoodieFileGroupId, HoodieInstant> fileGroupsInClustering) {
    try {
        LOG.info("Creating file group id to clustering instant map using external spillable Map. Max Mem=" + maxMemoryForClusteringFileGroups + ", BaseDir=" + baseStoreDir);
        new File(baseStoreDir).mkdirs();
        Map<HoodieFileGroupId, HoodieInstant> pendingMap = new ExternalSpillableMap<>(maxMemoryForClusteringFileGroups, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
        pendingMap.putAll(fileGroupsInClustering);
        return pendingMap;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) IOException(java.io.IOException) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) File(java.io.File)

Example 24 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class SpillableMapBasedFileSystemView method createFileIdToReplaceInstantMap.

@Override
protected Map<HoodieFileGroupId, HoodieInstant> createFileIdToReplaceInstantMap(final Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups) {
    try {
        LOG.info("Creating file group id to replace instant map using external spillable Map. Max Mem=" + maxMemoryForReplaceFileGroups + ", BaseDir=" + baseStoreDir);
        new File(baseStoreDir).mkdirs();
        Map<HoodieFileGroupId, HoodieInstant> pendingMap = new ExternalSpillableMap<>(maxMemoryForReplaceFileGroups, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
        pendingMap.putAll(replacedFileGroups);
        return pendingMap;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) IOException(java.io.IOException) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) File(java.io.File)

Example 25 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class AbstractTableFileSystemView method resetFileGroupsReplaced.

/**
 * Get replaced instant for each file group by looking at all commit instants.
 */
private void resetFileGroupsReplaced(HoodieTimeline timeline) {
    HoodieTimer hoodieTimer = new HoodieTimer();
    hoodieTimer.startTimer();
    // for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
    HoodieTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
    Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = replacedTimeline.getInstants().flatMap(instant -> {
        try {
            HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
            // get replace instant mapping for each partition, fileId
            return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().flatMap(entry -> entry.getValue().stream().map(e -> new AbstractMap.SimpleEntry<>(new HoodieFileGroupId(entry.getKey(), e), instant)));
        } catch (HoodieIOException ex) {
            if (ex.getIOException() instanceof FileNotFoundException) {
                // Replace instant could be deleted by archive and FileNotFoundException could be threw during getInstantDetails function
                // So that we need to catch the FileNotFoundException here and continue
                LOG.warn(ex.getMessage());
                return Stream.empty();
            } else {
                throw ex;
            }
        } catch (IOException e) {
            throw new HoodieIOException("error reading commit metadata for " + instant);
        }
    });
    Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    resetReplacedFileGroups(replacedFileGroups);
    LOG.info("Took " + hoodieTimer.endTimer() + " ms to read  " + replacedTimeline.countInstants() + " instants, " + replacedFileGroups.size() + " replaced file groups");
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) WriteLock(java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock) Predicate(java.util.function.Predicate) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) AbstractMap(java.util.AbstractMap) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) METADATA_BOOTSTRAP_INSTANT_TS(org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FileNotFoundException(java.io.FileNotFoundException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AbstractMap(java.util.AbstractMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)35 Pair (org.apache.hudi.common.util.collection.Pair)24 IOException (java.io.IOException)23 List (java.util.List)20 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)20 Collectors (java.util.stream.Collectors)19 Map (java.util.Map)18 ArrayList (java.util.ArrayList)17 Option (org.apache.hudi.common.util.Option)17 LogManager (org.apache.log4j.LogManager)17 Logger (org.apache.log4j.Logger)17 FileSlice (org.apache.hudi.common.model.FileSlice)16 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)16 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)16 Set (java.util.Set)15 Path (org.apache.hadoop.fs.Path)15 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)15 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)14 Arrays (java.util.Arrays)12 FSUtils (org.apache.hudi.common.fs.FSUtils)12