use of org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN in project hudi by apache.
the class HoodieTimelineArchiver method getCommitInstantsToArchive.
private Stream<HoodieInstant> getCommitInstantsToArchive() {
// TODO (na) : Add a way to return actions associated with a timeline and then merge/unify
// with logic above to avoid Stream.concat
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
Option<HoodieInstant> oldestPendingCompactionAndReplaceInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION)).filter(s -> !s.isCompleted()).firstInstant();
Option<HoodieInstant> oldestInflightCommitInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)).filterInflights().firstInstant();
// We cannot have any holes in the commit timeline. We cannot archive any commits which are
// made after the first savepoint present.
Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
if (!commitTimeline.empty() && commitTimeline.countInstants() > maxInstantsToKeep) {
// For Merge-On-Read table, inline or async compaction is enabled
// We need to make sure that there are enough delta commits in the active timeline
// to trigger compaction scheduling, when the trigger strategy of compaction is
// NUM_COMMITS or NUM_AND_TIME.
Option<HoodieInstant> oldestInstantToRetainForCompaction = (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && (config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_COMMITS || config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_AND_TIME)) ? CompactionUtils.getOldestInstantToRetainForCompaction(table.getActiveTimeline(), config.getInlineCompactDeltaCommitMax()) : Option.empty();
// Actually do the commits
Stream<HoodieInstant> instantToArchiveStream = commitTimeline.getInstants().filter(s -> {
// if no savepoint present, then don't filter
return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
}).filter(s -> {
// Ensure commits >= oldest pending compaction commit is retained
return oldestPendingCompactionAndReplaceInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
}).filter(s -> {
// get archived, i.e, instants after the oldestInflight are retained on the timeline
if (config.getFailedWritesCleanPolicy() == HoodieFailedWritesCleaningPolicy.LAZY) {
return oldestInflightCommitInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
}
return true;
}).filter(s -> oldestInstantToRetainForCompaction.map(instantToRetain -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp())).orElse(true));
return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep);
} else {
return Stream.empty();
}
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN in project hudi by apache.
the class IncrementalInputSplits method filterInstantsWithRange.
/**
* Returns the instants with a given issuedInstant to start from.
*
* @param commitTimeline The completed commits timeline
* @param issuedInstant The last issued instant that has already been delivered to downstream
* @return the filtered hoodie instants
*/
private List<HoodieInstant> filterInstantsWithRange(HoodieTimeline commitTimeline, final String issuedInstant) {
HoodieTimeline completedTimeline = commitTimeline.filterCompletedInstants();
if (issuedInstant != null) {
// returns early for streaming mode
return maySkipCompaction(completedTimeline.getInstants()).filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN, issuedInstant)).collect(Collectors.toList());
}
Stream<HoodieInstant> instantStream = completedTimeline.getInstants();
if (this.conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent() && !this.conf.get(FlinkOptions.READ_START_COMMIT).equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)) {
final String startCommit = this.conf.get(FlinkOptions.READ_START_COMMIT);
instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, startCommit));
}
if (this.conf.getOptional(FlinkOptions.READ_END_COMMIT).isPresent()) {
final String endCommit = this.conf.get(FlinkOptions.READ_END_COMMIT);
instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN_OR_EQUALS, endCommit));
}
return maySkipCompaction(instantStream).collect(Collectors.toList());
}
Aggregations