Search in sources :

Example 1 with WithMasterCheckpointHook

use of org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook in project flink by apache.

the class StreamingJobGraphGenerator method configureCheckpointing.

private void configureCheckpointing() {
    CheckpointConfig cfg = streamGraph.getCheckpointConfig();
    long interval = cfg.getCheckpointInterval();
    if (interval < MINIMAL_CHECKPOINT_TIME) {
        // interval of max value means disable periodic checkpoint
        interval = Long.MAX_VALUE;
    }
    // --- configure options ---
    CheckpointRetentionPolicy retentionAfterTermination;
    if (cfg.isExternalizedCheckpointsEnabled()) {
        CheckpointConfig.ExternalizedCheckpointCleanup cleanup = cfg.getExternalizedCheckpointCleanup();
        // Sanity check
        if (cleanup == null) {
            throw new IllegalStateException("Externalized checkpoints enabled, but no cleanup mode configured.");
        }
        retentionAfterTermination = cleanup.deleteOnCancellation() ? CheckpointRetentionPolicy.RETAIN_ON_FAILURE : CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION;
    } else {
        retentionAfterTermination = CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION;
    }
    // --- configure the master-side checkpoint hooks ---
    final ArrayList<MasterTriggerRestoreHook.Factory> hooks = new ArrayList<>();
    for (StreamNode node : streamGraph.getStreamNodes()) {
        if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
            Function f = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunction();
            if (f instanceof WithMasterCheckpointHook) {
                hooks.add(new FunctionMasterCheckpointHookFactory((WithMasterCheckpointHook<?>) f));
            }
        }
    }
    // because the hooks can have user-defined code, they need to be stored as
    // eagerly serialized values
    final SerializedValue<MasterTriggerRestoreHook.Factory[]> serializedHooks;
    if (hooks.isEmpty()) {
        serializedHooks = null;
    } else {
        try {
            MasterTriggerRestoreHook.Factory[] asArray = hooks.toArray(new MasterTriggerRestoreHook.Factory[hooks.size()]);
            serializedHooks = new SerializedValue<>(asArray);
        } catch (IOException e) {
            throw new FlinkRuntimeException("Trigger/restore hook is not serializable", e);
        }
    }
    // because the state backend can have user-defined code, it needs to be stored as
    // eagerly serialized value
    final SerializedValue<StateBackend> serializedStateBackend;
    if (streamGraph.getStateBackend() == null) {
        serializedStateBackend = null;
    } else {
        try {
            serializedStateBackend = new SerializedValue<StateBackend>(streamGraph.getStateBackend());
        } catch (IOException e) {
            throw new FlinkRuntimeException("State backend is not serializable", e);
        }
    }
    // because the checkpoint storage can have user-defined code, it needs to be stored as
    // eagerly serialized value
    final SerializedValue<CheckpointStorage> serializedCheckpointStorage;
    if (streamGraph.getCheckpointStorage() == null) {
        serializedCheckpointStorage = null;
    } else {
        try {
            serializedCheckpointStorage = new SerializedValue<>(streamGraph.getCheckpointStorage());
        } catch (IOException e) {
            throw new FlinkRuntimeException("Checkpoint storage is not serializable", e);
        }
    }
    // --- done, put it all together ---
    JobCheckpointingSettings settings = new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(interval).setCheckpointTimeout(cfg.getCheckpointTimeout()).setMinPauseBetweenCheckpoints(cfg.getMinPauseBetweenCheckpoints()).setMaxConcurrentCheckpoints(cfg.getMaxConcurrentCheckpoints()).setCheckpointRetentionPolicy(retentionAfterTermination).setExactlyOnce(getCheckpointingMode(cfg) == CheckpointingMode.EXACTLY_ONCE).setTolerableCheckpointFailureNumber(cfg.getTolerableCheckpointFailureNumber()).setUnalignedCheckpointsEnabled(cfg.isUnalignedCheckpointsEnabled()).setCheckpointIdOfIgnoredInFlightData(cfg.getCheckpointIdOfIgnoredInFlightData()).setAlignedCheckpointTimeout(cfg.getAlignedCheckpointTimeout().toMillis()).setEnableCheckpointsAfterTasksFinish(streamGraph.isEnableCheckpointsAfterTasksFinish()).build(), serializedStateBackend, streamGraph.isChangelogStateBackendEnabled(), serializedCheckpointStorage, serializedHooks);
    jobGraph.setSnapshotSettings(settings);
}
Also used : UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) WithMasterCheckpointHook(org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ArrayList(java.util.ArrayList) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) LoggerFactory(org.slf4j.LoggerFactory) UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) IOException(java.io.IOException) MasterTriggerRestoreHook(org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook) StateBackend(org.apache.flink.runtime.state.StateBackend) Function(org.apache.flink.api.common.functions.Function) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Function (org.apache.flink.api.common.functions.Function)1 CheckpointRetentionPolicy (org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy)1 MasterTriggerRestoreHook (org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook)1 JobCheckpointingSettings (org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings)1 CheckpointStorage (org.apache.flink.runtime.state.CheckpointStorage)1 StateBackend (org.apache.flink.runtime.state.StateBackend)1 WithMasterCheckpointHook (org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook)1 CheckpointConfig (org.apache.flink.streaming.api.environment.CheckpointConfig)1 SourceOperatorFactory (org.apache.flink.streaming.api.operators.SourceOperatorFactory)1 StreamOperatorFactory (org.apache.flink.streaming.api.operators.StreamOperatorFactory)1 UdfStreamOperatorFactory (org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory)1 YieldingOperatorFactory (org.apache.flink.streaming.api.operators.YieldingOperatorFactory)1 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)1 LoggerFactory (org.slf4j.LoggerFactory)1