use of org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS in project hudi by apache.
the class HoodieTimelineArchiver method deleteAllInstantsOlderOrEqualsInAuxMetaFolder.
/**
* Remove older instants from auxiliary meta folder.
*
* @param thresholdInstant Hoodie Instant
* @return success if all eligible file deleted successfully
* @throws IOException in case of error
*/
private boolean deleteAllInstantsOlderOrEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException {
List<HoodieInstant> instants = null;
boolean success = true;
try {
instants = metaClient.scanHoodieInstantsFromFileSystem(new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
} catch (FileNotFoundException e) {
/*
* On some FSs deletion of all files in the directory can auto remove the directory itself.
* GCS is one example, as it doesn't have real directories and subdirectories. When client
* removes all the files from a "folder" on GCS is has to create a special "/" to keep the folder
* around. If this doesn't happen (timeout, mis configured client, ...) folder will be deleted and
* in this case we should not break when aux folder is not found.
* GCS information: (https://cloud.google.com/storage/docs/gsutil/addlhelp/HowSubdirectoriesWork)
*/
LOG.warn("Aux path not found. Skipping: " + metaClient.getMetaAuxiliaryPath());
return true;
}
List<HoodieInstant> instantsToBeDeleted = instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(), LESSER_THAN_OR_EQUALS, thresholdInstant.getTimestamp())).collect(Collectors.toList());
for (HoodieInstant deleteInstant : instantsToBeDeleted) {
LOG.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath());
Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName());
if (metaClient.getFs().exists(metaFile)) {
success &= metaClient.getFs().delete(metaFile, false);
LOG.info("Deleted instant file in auxiliary meta path : " + metaFile);
}
}
return success;
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS in project hudi by apache.
the class HoodieTimelineArchiver method getCommitInstantsToArchive.
private Stream<HoodieInstant> getCommitInstantsToArchive() {
// TODO (na) : Add a way to return actions associated with a timeline and then merge/unify
// with logic above to avoid Stream.concat
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
Option<HoodieInstant> oldestPendingCompactionAndReplaceInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION)).filter(s -> !s.isCompleted()).firstInstant();
Option<HoodieInstant> oldestInflightCommitInstant = table.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)).filterInflights().firstInstant();
// We cannot have any holes in the commit timeline. We cannot archive any commits which are
// made after the first savepoint present.
Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
if (!commitTimeline.empty() && commitTimeline.countInstants() > maxInstantsToKeep) {
// For Merge-On-Read table, inline or async compaction is enabled
// We need to make sure that there are enough delta commits in the active timeline
// to trigger compaction scheduling, when the trigger strategy of compaction is
// NUM_COMMITS or NUM_AND_TIME.
Option<HoodieInstant> oldestInstantToRetainForCompaction = (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && (config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_COMMITS || config.getInlineCompactTriggerStrategy() == CompactionTriggerStrategy.NUM_AND_TIME)) ? CompactionUtils.getOldestInstantToRetainForCompaction(table.getActiveTimeline(), config.getInlineCompactDeltaCommitMax()) : Option.empty();
// Actually do the commits
Stream<HoodieInstant> instantToArchiveStream = commitTimeline.getInstants().filter(s -> {
// if no savepoint present, then don't filter
return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
}).filter(s -> {
// Ensure commits >= oldest pending compaction commit is retained
return oldestPendingCompactionAndReplaceInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
}).filter(s -> {
// get archived, i.e, instants after the oldestInflight are retained on the timeline
if (config.getFailedWritesCleanPolicy() == HoodieFailedWritesCleaningPolicy.LAZY) {
return oldestInflightCommitInstant.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp())).orElse(true);
}
return true;
}).filter(s -> oldestInstantToRetainForCompaction.map(instantToRetain -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp())).orElse(true));
return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep);
} else {
return Stream.empty();
}
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS in project hudi by apache.
the class IncrementalInputSplits method filterInstantsWithRange.
/**
* Returns the instants with a given issuedInstant to start from.
*
* @param commitTimeline The completed commits timeline
* @param issuedInstant The last issued instant that has already been delivered to downstream
* @return the filtered hoodie instants
*/
private List<HoodieInstant> filterInstantsWithRange(HoodieTimeline commitTimeline, final String issuedInstant) {
HoodieTimeline completedTimeline = commitTimeline.filterCompletedInstants();
if (issuedInstant != null) {
// returns early for streaming mode
return maySkipCompaction(completedTimeline.getInstants()).filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN, issuedInstant)).collect(Collectors.toList());
}
Stream<HoodieInstant> instantStream = completedTimeline.getInstants();
if (this.conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent() && !this.conf.get(FlinkOptions.READ_START_COMMIT).equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)) {
final String startCommit = this.conf.get(FlinkOptions.READ_START_COMMIT);
instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, startCommit));
}
if (this.conf.getOptional(FlinkOptions.READ_END_COMMIT).isPresent()) {
final String endCommit = this.conf.get(FlinkOptions.READ_END_COMMIT);
instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN_OR_EQUALS, endCommit));
}
return maySkipCompaction(instantStream).collect(Collectors.toList());
}
Aggregations