Search in sources :

Example 11 with JobCheckpointingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.

the class StreamingJobGraphGeneratorTest method testDisabledCheckpointing.

/**
 * Tests that disabled checkpointing sets the checkpointing interval to Long.MAX_VALUE and the
 * checkpoint mode to {@link CheckpointingMode#AT_LEAST_ONCE}.
 */
@Test
public void testDisabledCheckpointing() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.fromElements(0).print();
    StreamGraph streamGraph = env.getStreamGraph();
    assertFalse("Checkpointing enabled", streamGraph.getCheckpointConfig().isCheckpointingEnabled());
    JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);
    JobCheckpointingSettings snapshottingSettings = jobGraph.getCheckpointingSettings();
    assertEquals(Long.MAX_VALUE, snapshottingSettings.getCheckpointCoordinatorConfiguration().getCheckpointInterval());
    assertFalse(snapshottingSettings.getCheckpointCoordinatorConfiguration().isExactlyOnce());
    List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
    StreamConfig streamConfig = new StreamConfig(verticesSorted.get(0).getConfiguration());
    assertEquals(CheckpointingMode.AT_LEAST_ONCE, streamConfig.getCheckpointMode());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 12 with JobCheckpointingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.

the class DefaultSchedulerCheckpointCoordinatorTest method createSchedulerAndEnableCheckpointing.

private DefaultScheduler createSchedulerAndEnableCheckpointing(CheckpointIDCounter counter, CompletedCheckpointStore store) throws Exception {
    final Time timeout = Time.days(1L);
    final JobVertex jobVertex = new JobVertex("MockVertex");
    jobVertex.setInvokableClass(AbstractInvokable.class);
    final CheckpointCoordinatorConfiguration chkConfig = CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(100L).setCheckpointTimeout(100L).build();
    final JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(chkConfig, null);
    final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertex(jobVertex).setJobCheckpointingSettings(checkpointingSettings).build();
    return SchedulerTestingUtils.newSchedulerBuilder(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread()).setCheckpointRecoveryFactory(new TestingCheckpointRecoveryFactory(store, counter)).setRpcTimeout(timeout).build();
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) Time(org.apache.flink.api.common.time.Time) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration)

Example 13 with JobCheckpointingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.

the class StreamingJobGraphGenerator method configureCheckpointing.

private void configureCheckpointing() {
    CheckpointConfig cfg = streamGraph.getCheckpointConfig();
    long interval = cfg.getCheckpointInterval();
    if (interval < MINIMAL_CHECKPOINT_TIME) {
        // interval of max value means disable periodic checkpoint
        interval = Long.MAX_VALUE;
    }
    // --- configure options ---
    CheckpointRetentionPolicy retentionAfterTermination;
    if (cfg.isExternalizedCheckpointsEnabled()) {
        CheckpointConfig.ExternalizedCheckpointCleanup cleanup = cfg.getExternalizedCheckpointCleanup();
        // Sanity check
        if (cleanup == null) {
            throw new IllegalStateException("Externalized checkpoints enabled, but no cleanup mode configured.");
        }
        retentionAfterTermination = cleanup.deleteOnCancellation() ? CheckpointRetentionPolicy.RETAIN_ON_FAILURE : CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION;
    } else {
        retentionAfterTermination = CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION;
    }
    // --- configure the master-side checkpoint hooks ---
    final ArrayList<MasterTriggerRestoreHook.Factory> hooks = new ArrayList<>();
    for (StreamNode node : streamGraph.getStreamNodes()) {
        if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
            Function f = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunction();
            if (f instanceof WithMasterCheckpointHook) {
                hooks.add(new FunctionMasterCheckpointHookFactory((WithMasterCheckpointHook<?>) f));
            }
        }
    }
    // because the hooks can have user-defined code, they need to be stored as
    // eagerly serialized values
    final SerializedValue<MasterTriggerRestoreHook.Factory[]> serializedHooks;
    if (hooks.isEmpty()) {
        serializedHooks = null;
    } else {
        try {
            MasterTriggerRestoreHook.Factory[] asArray = hooks.toArray(new MasterTriggerRestoreHook.Factory[hooks.size()]);
            serializedHooks = new SerializedValue<>(asArray);
        } catch (IOException e) {
            throw new FlinkRuntimeException("Trigger/restore hook is not serializable", e);
        }
    }
    // because the state backend can have user-defined code, it needs to be stored as
    // eagerly serialized value
    final SerializedValue<StateBackend> serializedStateBackend;
    if (streamGraph.getStateBackend() == null) {
        serializedStateBackend = null;
    } else {
        try {
            serializedStateBackend = new SerializedValue<StateBackend>(streamGraph.getStateBackend());
        } catch (IOException e) {
            throw new FlinkRuntimeException("State backend is not serializable", e);
        }
    }
    // because the checkpoint storage can have user-defined code, it needs to be stored as
    // eagerly serialized value
    final SerializedValue<CheckpointStorage> serializedCheckpointStorage;
    if (streamGraph.getCheckpointStorage() == null) {
        serializedCheckpointStorage = null;
    } else {
        try {
            serializedCheckpointStorage = new SerializedValue<>(streamGraph.getCheckpointStorage());
        } catch (IOException e) {
            throw new FlinkRuntimeException("Checkpoint storage is not serializable", e);
        }
    }
    // --- done, put it all together ---
    JobCheckpointingSettings settings = new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(interval).setCheckpointTimeout(cfg.getCheckpointTimeout()).setMinPauseBetweenCheckpoints(cfg.getMinPauseBetweenCheckpoints()).setMaxConcurrentCheckpoints(cfg.getMaxConcurrentCheckpoints()).setCheckpointRetentionPolicy(retentionAfterTermination).setExactlyOnce(getCheckpointingMode(cfg) == CheckpointingMode.EXACTLY_ONCE).setTolerableCheckpointFailureNumber(cfg.getTolerableCheckpointFailureNumber()).setUnalignedCheckpointsEnabled(cfg.isUnalignedCheckpointsEnabled()).setCheckpointIdOfIgnoredInFlightData(cfg.getCheckpointIdOfIgnoredInFlightData()).setAlignedCheckpointTimeout(cfg.getAlignedCheckpointTimeout().toMillis()).setEnableCheckpointsAfterTasksFinish(streamGraph.isEnableCheckpointsAfterTasksFinish()).build(), serializedStateBackend, streamGraph.isChangelogStateBackendEnabled(), serializedCheckpointStorage, serializedHooks);
    jobGraph.setSnapshotSettings(settings);
}
Also used : UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) WithMasterCheckpointHook(org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ArrayList(java.util.ArrayList) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) LoggerFactory(org.slf4j.LoggerFactory) UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) IOException(java.io.IOException) MasterTriggerRestoreHook(org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook) StateBackend(org.apache.flink.runtime.state.StateBackend) Function(org.apache.flink.api.common.functions.Function) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy)

Example 14 with JobCheckpointingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.

the class ArchivedExecutionGraphTest method testCheckpointSettingsArchiving.

@Test
public void testCheckpointSettingsArchiving() {
    final CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = CheckpointCoordinatorConfiguration.builder().build();
    final ArchivedExecutionGraph archivedGraph = ArchivedExecutionGraph.createSparseArchivedExecutionGraph(new JobID(), "TestJob", JobStatus.INITIALIZING, null, new JobCheckpointingSettings(checkpointCoordinatorConfiguration, null), System.currentTimeMillis());
    assertContainsCheckpointSettings(archivedGraph);
}
Also used : JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 15 with JobCheckpointingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.

the class ArchivedExecutionGraphTest method setupExecutionGraph.

@BeforeClass
public static void setupExecutionGraph() throws Exception {
    // -------------------------------------------------------------------------------------------------------------
    // Setup
    // -------------------------------------------------------------------------------------------------------------
    JobVertexID v1ID = new JobVertexID();
    JobVertexID v2ID = new JobVertexID();
    JobVertex v1 = new JobVertex("v1", v1ID);
    JobVertex v2 = new JobVertex("v2", v2ID);
    v1.setParallelism(1);
    v2.setParallelism(2);
    v1.setInvokableClass(AbstractInvokable.class);
    v2.setInvokableClass(AbstractInvokable.class);
    ExecutionConfig config = new ExecutionConfig();
    config.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration());
    config.setParallelism(4);
    config.enableObjectReuse();
    config.setGlobalJobParameters(new TestJobParameters());
    CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(100, 100, 100, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0);
    JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(chkConfig, null);
    final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertices(asList(v1, v2)).setJobCheckpointingSettings(checkpointingSettings).setExecutionConfig(config).build();
    SchedulerBase scheduler = SchedulerTestingUtils.createScheduler(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread());
    runtimeGraph = scheduler.getExecutionGraph();
    scheduler.startScheduling();
    scheduler.updateTaskExecutionState(new TaskExecutionState(runtimeGraph.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt().getAttemptId(), ExecutionState.FAILED, new RuntimeException("Local failure")));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ArchivedExecutionConfig(org.apache.flink.api.common.ArchivedExecutionConfig) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) BeforeClass(org.junit.BeforeClass)

Aggregations

JobCheckpointingSettings (org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings)20 CheckpointCoordinatorConfiguration (org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration)15 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)11 Test (org.junit.Test)7 IOException (java.io.IOException)5 JobID (org.apache.flink.api.common.JobID)4 CheckpointStorage (org.apache.flink.runtime.state.CheckpointStorage)4 StateBackend (org.apache.flink.runtime.state.StateBackend)4 Duration (java.time.Duration)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 CompletableFuture (java.util.concurrent.CompletableFuture)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 JobStatus (org.apache.flink.api.common.JobStatus)3 Configuration (org.apache.flink.configuration.Configuration)3 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)2 BlockingQueue (java.util.concurrent.BlockingQueue)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 TimeUnit (java.util.concurrent.TimeUnit)2