use of org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration in project flink by apache.
the class CheckpointCoordinatorTest method testExternalizedCheckpoints.
/**
* Tests that the externalized checkpoint configuration is respected.
*/
@Test
public void testExternalizedCheckpoints() throws Exception {
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
// set up the coordinator and validate the initial state
CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointRetentionPolicy(CheckpointRetentionPolicy.RETAIN_ON_FAILURE).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setTimer(manuallyTriggeredScheduledExecutor).build();
CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
for (PendingCheckpoint checkpoint : checkpointCoordinator.getPendingCheckpoints().values()) {
CheckpointProperties props = checkpoint.getProps();
CheckpointProperties expected = CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.RETAIN_ON_FAILURE);
assertEquals(expected, props);
}
// the now we should have a completed checkpoint
checkpointCoordinator.shutdown();
}
use of org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration in project flink by apache.
the class CheckpointCoordinatorTest method testMaxConcurrentAttempts.
private void testMaxConcurrentAttempts(int maxConcurrentAttempts) {
try {
JobVertexID jobVertexID1 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointInterval(// periodic interval is 10 ms
10).setCheckpointTimeout(// timeout is very long (200 s)
200000).setMinPauseBetweenCheckpoints(// no extra delay
0L).setMaxConcurrentCheckpoints(maxConcurrentAttempts).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
for (int i = 0; i < maxConcurrentAttempts; i++) {
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
}
assertEquals(maxConcurrentAttempts, gateway.getTriggeredCheckpoints(attemptID1).size());
assertEquals(0, gateway.getNotifiedCompletedCheckpoints(attemptID1).size());
// now, once we acknowledge one checkpoint, it should trigger the next one
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, 1L), TASK_MANAGER_LOCATION_INFO);
final Collection<ScheduledFuture<?>> periodicScheduledTasks = manuallyTriggeredScheduledExecutor.getActivePeriodicScheduledTask();
assertEquals(1, periodicScheduledTasks.size());
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(maxConcurrentAttempts + 1, gateway.getTriggeredCheckpoints(attemptID1).size());
// no further checkpoints should happen
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(maxConcurrentAttempts + 1, gateway.getTriggeredCheckpoints(attemptID1).size());
checkpointCoordinator.shutdown();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration in project flink by apache.
the class FailoverStrategyCheckpointCoordinatorTest method testAbortPendingCheckpointsWithTriggerValidation.
/**
* Tests that {@link CheckpointCoordinator#abortPendingCheckpoints(CheckpointException)} called
* on job failover could handle the {@code currentPeriodicTrigger} null case well.
*/
@Test
public void testAbortPendingCheckpointsWithTriggerValidation() throws Exception {
final int maxConcurrentCheckpoints = ThreadLocalRandom.current().nextInt(10) + 1;
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).setTransitToRunning(false).build();
CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfiguration(Integer.MAX_VALUE, Integer.MAX_VALUE, 0, maxConcurrentCheckpoints, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0);
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinator(graph.getJobID(), checkpointCoordinatorConfiguration, Collections.emptyList(), new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), new CheckpointsCleaner(), manualThreadExecutor, mock(CheckpointFailureManager.class), new DefaultCheckpointPlanCalculator(graph.getJobID(), new ExecutionGraphCheckpointPlanCalculatorContext(graph), graph.getVerticesTopologically(), false), new ExecutionAttemptMappingProvider(graph.getAllExecutionVertices()), mock(CheckpointStatsTracker.class));
// switch current execution's state to running to allow checkpoint could be triggered.
graph.transitionToRunning();
graph.getAllExecutionVertices().forEach(task -> task.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING));
checkpointCoordinator.startCheckpointScheduler();
assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable());
// only trigger the periodic scheduling
// we can't trigger all scheduled task, because there is also a cancellation scheduled
manualThreadExecutor.triggerPeriodicScheduledTasks();
manualThreadExecutor.triggerAll();
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
for (int i = 1; i < maxConcurrentCheckpoints; i++) {
checkpointCoordinator.triggerCheckpoint(false);
manualThreadExecutor.triggerAll();
assertEquals(i + 1, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable());
}
// as we only support limited concurrent checkpoints, after checkpoint triggered more than
// the limits,
// the currentPeriodicTrigger would been assigned as null.
checkpointCoordinator.triggerCheckpoint(false);
manualThreadExecutor.triggerAll();
assertEquals(maxConcurrentCheckpoints, checkpointCoordinator.getNumberOfPendingCheckpoints());
checkpointCoordinator.abortPendingCheckpoints(new CheckpointException(CheckpointFailureReason.JOB_FAILOVER_REGION));
// after aborting checkpoints, we ensure currentPeriodicTrigger still available.
assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable());
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
}
use of org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration in project flink by apache.
the class CheckpointSettingsSerializableTest method testDeserializationOfUserCodeWithUserClassLoader.
@Test
public void testDeserializationOfUserCodeWithUserClassLoader() throws Exception {
final ClassLoaderUtils.ObjectAndClassLoader<Serializable> outsideClassLoading = ClassLoaderUtils.createSerializableObjectFromNewClassLoader();
final ClassLoader classLoader = outsideClassLoading.getClassLoader();
final Serializable outOfClassPath = outsideClassLoading.getObject();
final MasterTriggerRestoreHook.Factory[] hooks = { new TestFactory(outOfClassPath) };
final SerializedValue<MasterTriggerRestoreHook.Factory[]> serHooks = new SerializedValue<>(hooks);
final JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(new CheckpointCoordinatorConfiguration(1000L, 10000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0), new SerializedValue<StateBackend>(new CustomStateBackend(outOfClassPath)), TernaryBoolean.UNDEFINED, new SerializedValue<CheckpointStorage>(new CustomCheckpointStorage(outOfClassPath)), serHooks);
final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().setJobCheckpointingSettings(checkpointingSettings).build();
// to serialize/deserialize the job graph to see if the behavior is correct under
// distributed execution
final JobGraph copy = CommonTestUtils.createCopySerializable(jobGraph);
final ExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(copy).setUserClassLoader(classLoader).build();
assertEquals(1, eg.getCheckpointCoordinator().getNumberOfRegisteredMasterHooks());
assertTrue(jobGraph.getCheckpointingSettings().getDefaultStateBackend().deserializeValue(classLoader) instanceof CustomStateBackend);
}
use of org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration in project flink by apache.
the class CheckpointCoordinatorMasterHooksTest method instantiateCheckpointCoordinator.
private CheckpointCoordinator instantiateCheckpointCoordinator(ExecutionGraph graph, ScheduledExecutor testingScheduledExecutor) {
CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(10000000L, 600000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0);
Executor executor = Executors.directExecutor();
return new CheckpointCoordinator(graph.getJobID(), chkConfig, Collections.emptyList(), new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(10), new MemoryStateBackend(), executor, new CheckpointsCleaner(), testingScheduledExecutor, new CheckpointFailureManager(0, NoOpFailJobCall.INSTANCE), new DefaultCheckpointPlanCalculator(graph.getJobID(), new ExecutionGraphCheckpointPlanCalculatorContext(graph), graph.getVerticesTopologically(), false), new ExecutionAttemptMappingProvider(graph.getAllExecutionVertices()), new CheckpointStatsTracker(1, new DummyMetricGroup()));
}
Aggregations