use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.
the class ChangelogStateBackendLoadingTest method assertStateBackendAndChangelogInEnvironmentAndStreamGraphAndJobGraph.
private void assertStateBackendAndChangelogInEnvironmentAndStreamGraphAndJobGraph(StreamExecutionEnvironment env, TernaryBoolean isChangelogEnabled, Class<?> rootStateBackendClass) throws Exception {
assertEquals(isChangelogEnabled, env.isChangelogStateBackendEnabled());
if (rootStateBackendClass == null) {
assertNull(env.getStateBackend());
} else {
assertSame(rootStateBackendClass, env.getStateBackend().getClass());
}
StreamGraph streamGraph = env.getStreamGraph(false);
assertEquals(isChangelogEnabled, streamGraph.isChangelogStateBackendEnabled());
if (rootStateBackendClass == null) {
assertNull(streamGraph.getStateBackend());
} else {
assertSame(rootStateBackendClass, streamGraph.getStateBackend().getClass());
}
JobCheckpointingSettings checkpointingSettings = streamGraph.getJobGraph().getCheckpointingSettings();
assertEquals(isChangelogEnabled, checkpointingSettings.isChangelogStateBackendEnabled());
if (rootStateBackendClass == null) {
assertNull(checkpointingSettings.getDefaultStateBackend());
} else {
assertSame(rootStateBackendClass, checkpointingSettings.getDefaultStateBackend().deserializeValue(cl).getClass());
assertSame(rootStateBackendClass, unwrapFromDelegatingStateBackend(checkpointingSettings.getDefaultStateBackend().deserializeValue(cl)).getClass());
}
}
use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.
the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskFailureFromStopWithSavepoint.
@Test
public void testExceptionHistoryWithTaskFailureFromStopWithSavepoint() throws Exception {
final Exception expectedException = new Exception("Expected Local Exception");
Consumer<JobGraph> setupJobGraph = jobGraph -> jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().build(), null));
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
Consumer<AdaptiveSchedulerBuilder> setupScheduler = builder -> builder.setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner);
BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
final ExecutionAttemptID attemptId = attemptIds.get(1);
scheduler.stopWithSavepoint("file:///tmp/target", true, SavepointFormatType.CANONICAL);
scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.FAILED, expectedException)));
};
final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = runExceptionHistoryTests(testLogic, setupScheduler, setupJobGraph);
assertThat(actualExceptionHistory).hasSize(1);
final RootExceptionHistoryEntry failure = actualExceptionHistory.iterator().next();
assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException);
}
use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.
the class AdaptiveSchedulerClusterITCase method testCheckpointStatsPersistedAcrossRescale.
@Test
public void testCheckpointStatsPersistedAcrossRescale() throws Exception {
final MiniCluster miniCluster = miniClusterResource.getMiniCluster();
JobVertex jobVertex = new JobVertex("jobVertex", JOB_VERTEX_ID);
jobVertex.setInvokableClass(CheckpointingNoOpInvokable.class);
jobVertex.setParallelism(PARALLELISM);
final JobGraph jobGraph = JobGraphTestUtils.streamingJobGraph(jobVertex);
jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(100).setCheckpointTimeout(1000).build(), null));
miniCluster.submitJob(jobGraph).join();
// wait until some checkpoints have been completed
CommonTestUtils.waitUntilCondition(() -> miniCluster.getExecutionGraph(jobGraph.getJobID()).thenApply(eg -> eg.getCheckpointStatsSnapshot().getCounts().getNumberOfCompletedCheckpoints() > 0).get(), Deadline.fromNow(Duration.ofHours(1)));
miniCluster.terminateTaskManager(0);
waitUntilParallelismForVertexReached(jobGraph.getJobID(), JOB_VERTEX_ID, NUMBER_SLOTS_PER_TASK_MANAGER * (NUMBER_TASK_MANAGERS - 1));
// check that the very first checkpoint is still accessible
final List<AbstractCheckpointStats> checkpointHistory = miniCluster.getExecutionGraph(jobGraph.getJobID()).thenApply(eg -> eg.getCheckpointStatsSnapshot().getHistory().getCheckpoints()).get();
assertThat(checkpointHistory.get(checkpointHistory.size() - 1).getCheckpointId(), is(1L));
}
use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.
the class JMXJobManagerMetricTest method testJobManagerJMXMetricAccess.
/**
* Tests that metrics registered on the JobManager are actually accessible via JMX.
*/
@Test
void testJobManagerJMXMetricAccess(@InjectClusterClient ClusterClient<?> client) throws Exception {
Deadline deadline = Deadline.now().plus(Duration.ofMinutes(2));
try {
JobVertex sourceJobVertex = new JobVertex("Source");
sourceJobVertex.setInvokableClass(BlockingInvokable.class);
sourceJobVertex.setParallelism(1);
final JobCheckpointingSettings jobCheckpointingSettings = new JobCheckpointingSettings(new CheckpointCoordinatorConfiguration(500, 500, 50, 5, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0), null);
final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().setJobName("TestingJob").addJobVertex(sourceJobVertex).setJobCheckpointingSettings(jobCheckpointingSettings).build();
client.submitJob(jobGraph).get();
FutureUtils.retrySuccessfulWithDelay(() -> client.getJobStatus(jobGraph.getJobID()), Time.milliseconds(10), deadline, status -> status == JobStatus.RUNNING, TestingUtils.defaultScheduledExecutor()).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
Set<ObjectName> nameSet = mBeanServer.queryNames(new ObjectName("org.apache.flink.jobmanager.job.lastCheckpointSize:job_name=TestingJob,*"), null);
assertThat(nameSet).hasSize(1);
assertThat(mBeanServer.getAttribute(nameSet.iterator().next(), "Value")).isEqualTo(-1L);
BlockingInvokable.unblock();
} finally {
BlockingInvokable.unblock();
}
}
use of org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings in project flink by apache.
the class SchedulerTestingUtils method enableCheckpointing.
public static void enableCheckpointing(final JobGraph jobGraph, @Nullable StateBackend stateBackend, @Nullable CheckpointStorage checkpointStorage) {
final CheckpointCoordinatorConfiguration config = new CheckpointCoordinatorConfiguration(// disable periodical checkpointing
Long.MAX_VALUE, DEFAULT_CHECKPOINT_TIMEOUT_MS, 0, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, false, false, 0, 0);
SerializedValue<StateBackend> serializedStateBackend = null;
if (stateBackend != null) {
try {
serializedStateBackend = new SerializedValue<>(stateBackend);
} catch (IOException e) {
throw new RuntimeException("could not serialize state backend", e);
}
}
SerializedValue<CheckpointStorage> serializedCheckpointStorage = null;
if (checkpointStorage != null) {
try {
serializedCheckpointStorage = new SerializedValue<>(checkpointStorage);
} catch (IOException e) {
throw new RuntimeException("could not serialize checkpoint storage", e);
}
}
jobGraph.setSnapshotSettings(new JobCheckpointingSettings(config, serializedStateBackend, TernaryBoolean.UNDEFINED, serializedCheckpointStorage, null));
}
Aggregations