Search in sources :

Example 1 with HoodieDeltaStreamerException

use of org.apache.hudi.utilities.exception.HoodieDeltaStreamerException in project hudi by apache.

the class DeltaSync method getCheckpointToResume.

/**
 * Process previous commit metadata and checkpoint configs set by user to determine the checkpoint to resume from.
 * @param commitTimelineOpt commit timeline of interest.
 * @return the checkpoint to resume from if applicable.
 * @throws IOException
 */
private Option<String> getCheckpointToResume(Option<HoodieTimeline> commitTimelineOpt) throws IOException {
    Option<String> resumeCheckpointStr = Option.empty();
    Option<HoodieInstant> lastCommit = commitTimelineOpt.get().lastInstant();
    if (lastCommit.isPresent()) {
        // if previous commit metadata did not have the checkpoint key, try traversing previous commits until we find one.
        Option<HoodieCommitMetadata> commitMetadataOption = getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get());
        if (commitMetadataOption.isPresent()) {
            HoodieCommitMetadata commitMetadata = commitMetadataOption.get();
            LOG.debug("Checkpoint reset from metadata: " + commitMetadata.getMetadata(CHECKPOINT_RESET_KEY));
            if (cfg.checkpoint != null && (StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)) || !cfg.checkpoint.equals(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)))) {
                resumeCheckpointStr = Option.of(cfg.checkpoint);
            } else if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_KEY))) {
                // if previous checkpoint is an empty string, skip resume use Option.empty()
                resumeCheckpointStr = Option.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
            } else if (HoodieTimeline.compareTimestamps(HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, HoodieTimeline.LESSER_THAN, lastCommit.get().getTimestamp())) {
                throw new HoodieDeltaStreamerException("Unable to find previous checkpoint. Please double check if this table " + "was indeed built via delta streamer. Last Commit :" + lastCommit + ", Instants :" + commitTimelineOpt.get().getInstants().collect(Collectors.toList()) + ", CommitMetadata=" + commitMetadata.toJsonString());
            }
            // KAFKA_CHECKPOINT_TYPE will be honored only for first batch.
            if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY))) {
                props.remove(KafkaOffsetGen.Config.KAFKA_CHECKPOINT_TYPE.key());
            }
        } else if (cfg.checkpoint != null) {
            // getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get()) will never return a commit metadata w/o any checkpoint key set.
            resumeCheckpointStr = Option.of(cfg.checkpoint);
        }
    }
    return resumeCheckpointStr;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieDeltaStreamerException(org.apache.hudi.utilities.exception.HoodieDeltaStreamerException)

Aggregations

HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)1 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)1 HoodieDeltaStreamerException (org.apache.hudi.utilities.exception.HoodieDeltaStreamerException)1