Search in sources :

Example 1 with LESSER_THAN

use of org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN in project hudi by apache.

the class HoodieTimelineArchiver method getCommitInstantsToArchive.

private Stream<HoodieInstant> getCommitInstantsToArchive() {
    // TODO (na) : Add a way to return actions associated with a timeline and then merge/unify
    // with logic above to avoid Stream.concat
    HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
    Option<HoodieInstant> oldestPendingCompactionAndReplaceInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION)).filter(s -> !s.isCompleted()).firstInstant();
    Option<HoodieInstant> oldestInflightCommitInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)).filterInflights().firstInstant();
    // We cannot have any holes in the commit timeline. We cannot archive any commits which are
    // made after the first savepoint present.
    Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
    if (!commitTimeline.empty() && commitTimeline.countInstants() > maxInstantsToKeep) {
        // For Merge-On-Read table, inline or async compaction is enabled
        // We need to make sure that there are enough delta commits in the active timeline
        // to trigger compaction scheduling, when the trigger strategy of compaction is
        // NUM_COMMITS or NUM_AND_TIME.
        Option<HoodieInstant> oldestInstantToRetainForCompaction = (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && (config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_COMMITS || config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_AND_TIME)) ? CompactionUtils.getOldestInstantToRetainForCompaction(table.getActiveTimeline(), config.getInlineCompactDeltaCommitMax()) : Option.empty();
        // Actually do the commits
        Stream<HoodieInstant> instantToArchiveStream = commitTimeline.getInstants().filter(s -> {
            // if no savepoint present, then don't filter
            return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
        }).filter(s -> {
            // Ensure commits >= oldest pending compaction commit is retained
            return oldestPendingCompactionAndReplaceInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
        }).filter(s -> {
            // get archived, i.e, instants after the oldestInflight are retained on the timeline
            if (config.getFailedWritesCleanPolicy() == HoodieFailedWritesCleaningPolicy.LAZY) {
                return oldestInflightCommitInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
            }
            return true;
        }).filter(s -> oldestInstantToRetainForCompaction.map(instantToRetain -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp())).orElse(true));
        return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep);
    } else {
        return Stream.empty();
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) Schema(org.apache.avro.Schema) Collection(java.util.Collection) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) HoodieArchivedLogFile(org.apache.hudi.common.model.HoodieArchivedLogFile) LESSER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) CompactionTriggerStrategy(org.apache.hudi.table.action.compact.CompactionTriggerStrategy) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) MetadataConversionUtils(org.apache.hudi.client.utils.MetadataConversionUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LESSER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) IOException(java.io.IOException) StorageSchemes(org.apache.hudi.common.fs.StorageSchemes) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 Schema (org.apache.avro.Schema)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 HoodieArchivedMetaEntry (org.apache.hudi.avro.model.HoodieArchivedMetaEntry)1 HoodieMergeArchiveFilePlan (org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan)1 MetadataConversionUtils (org.apache.hudi.client.utils.MetadataConversionUtils)1 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)1