Search in sources :

Example 1 with GREATER_THAN

use of org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN in project hudi by apache.

the class HoodieTimelineArchiver method getCommitInstantsToArchive.

private Stream<HoodieInstant> getCommitInstantsToArchive() {
    // TODO (na) : Add a way to return actions associated with a timeline and then merge/unify
    // with logic above to avoid Stream.concat
    HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
    Option<HoodieInstant> oldestPendingCompactionAndReplaceInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION)).filter(s -> !s.isCompleted()).firstInstant();
    Option<HoodieInstant> oldestInflightCommitInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)).filterInflights().firstInstant();
    // We cannot have any holes in the commit timeline. We cannot archive any commits which are
    // made after the first savepoint present.
    Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
    if (!commitTimeline.empty() && commitTimeline.countInstants() > maxInstantsToKeep) {
        // For Merge-On-Read table, inline or async compaction is enabled
        // We need to make sure that there are enough delta commits in the active timeline
        // to trigger compaction scheduling, when the trigger strategy of compaction is
        // NUM_COMMITS or NUM_AND_TIME.
        Option<HoodieInstant> oldestInstantToRetainForCompaction = (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && (config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_COMMITS || config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_AND_TIME)) ? CompactionUtils.getOldestInstantToRetainForCompaction(table.getActiveTimeline(), config.getInlineCompactDeltaCommitMax()) : Option.empty();
        // Actually do the commits
        Stream<HoodieInstant> instantToArchiveStream = commitTimeline.getInstants().filter(s -> {
            // if no savepoint present, then don't filter
            return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
        }).filter(s -> {
            // Ensure commits >= oldest pending compaction commit is retained
            return oldestPendingCompactionAndReplaceInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
        }).filter(s -> {
            // get archived, i.e, instants after the oldestInflight are retained on the timeline
            if (config.getFailedWritesCleanPolicy() == HoodieFailedWritesCleaningPolicy.LAZY) {
                return oldestInflightCommitInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
            }
            return true;
        }).filter(s -> oldestInstantToRetainForCompaction.map(instantToRetain -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp())).orElse(true));
        return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep);
    } else {
        return Stream.empty();
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) Schema(org.apache.avro.Schema) Collection(java.util.Collection) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) HoodieArchivedLogFile(org.apache.hudi.common.model.HoodieArchivedLogFile) LESSER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) CompactionTriggerStrategy(org.apache.hudi.table.action.compact.CompactionTriggerStrategy) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) MetadataConversionUtils(org.apache.hudi.client.utils.MetadataConversionUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LESSER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) IOException(java.io.IOException) StorageSchemes(org.apache.hudi.common.fs.StorageSchemes) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Example 2 with GREATER_THAN

use of org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN in project hudi by apache.

the class IncrementalInputSplits method filterInstantsWithRange.

/**
 * Returns the instants with a given issuedInstant to start from.
 *
 * @param commitTimeline The completed commits timeline
 * @param issuedInstant  The last issued instant that has already been delivered to downstream
 * @return the filtered hoodie instants
 */
private List<HoodieInstant> filterInstantsWithRange(HoodieTimeline commitTimeline, final String issuedInstant) {
    HoodieTimeline completedTimeline = commitTimeline.filterCompletedInstants();
    if (issuedInstant != null) {
        // returns early for streaming mode
        return maySkipCompaction(completedTimeline.getInstants()).filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN, issuedInstant)).collect(Collectors.toList());
    }
    Stream<HoodieInstant> instantStream = completedTimeline.getInstants();
    if (this.conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent() && !this.conf.get(FlinkOptions.READ_START_COMMIT).equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)) {
        final String startCommit = this.conf.get(FlinkOptions.READ_START_COMMIT);
        instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, startCommit));
    }
    if (this.conf.getOptional(FlinkOptions.READ_END_COMMIT).isPresent()) {
        final String endCommit = this.conf.get(FlinkOptions.READ_END_COMMIT);
        instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN_OR_EQUALS, endCommit));
    }
    return maySkipCompaction(instantStream).collect(Collectors.toList());
}
Also used : HoodieInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieInputFormatUtils) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) Serializable(scala.Serializable) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseFile(org.apache.hudi.common.model.BaseFile) Path(org.apache.flink.core.fs.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) LESSER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Objects(java.util.Objects) WriteProfiles(org.apache.hudi.sink.partitioner.profile.WriteProfiles) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) InstantRange(org.apache.hudi.common.table.log.InstantRange) MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) Collections(java.util.Collections) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Aggregations

ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)2 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)2 HoodieArchivedTimeline (org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)2 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)2 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)2 GREATER_THAN (org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN)2 LESSER_THAN_OR_EQUALS (org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS)2 Option (org.apache.hudi.common.util.Option)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1