use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testExternalizedCheckpoints.
/**
* Tests that the externalized checkpoint configuration is respected.
*/
@Test
public void testExternalizedCheckpoints() throws Exception {
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
// set up the coordinator and validate the initial state
CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointRetentionPolicy(CheckpointRetentionPolicy.RETAIN_ON_FAILURE).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setTimer(manuallyTriggeredScheduledExecutor).build();
CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
for (PendingCheckpoint checkpoint : checkpointCoordinator.getPendingCheckpoints().values()) {
CheckpointProperties props = checkpoint.getProps();
CheckpointProperties expected = CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.RETAIN_ON_FAILURE);
assertEquals(expected, props);
}
// the now we should have a completed checkpoint
checkpointCoordinator.shutdown();
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testCheckpointAbortsIfTriggerTasksAreFinishedAndIOException.
@Test
public void testCheckpointAbortsIfTriggerTasksAreFinishedAndIOException() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).build();
// set up the coordinator
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointStorage(new IOExceptionCheckpointStorage()).setTimer(manuallyTriggeredScheduledExecutor).build();
Arrays.stream(graph.getJobVertex(jobVertexID1).getTaskVertices()).forEach(task -> task.getCurrentExecutionAttempt().markFinished());
// nothing should be happening
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
checkpointCoordinator.startCheckpointScheduler();
// trigger the first checkpoint. this should not succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertTrue(checkpointFuture.isCompletedExceptionally());
// still, nothing should be happening
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
checkpointCoordinator.shutdown();
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testCheckpointTimeoutIsolated.
@Test
public void testCheckpointTimeoutIsolated() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
// set up the coordinator
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
// trigger a checkpoint, partially acknowledged
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
assertFalse(checkpoint.isDisposed());
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates1 = spy(new TaskStateSnapshot());
OperatorSubtaskState subtaskState1 = mock(OperatorSubtaskState.class);
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpoint.getCheckpointId(), new CheckpointMetrics(), taskOperatorSubtaskStates1), TASK_MANAGER_LOCATION_INFO);
// triggers cancelling
manuallyTriggeredScheduledExecutor.triggerScheduledTasks();
assertTrue("Checkpoint was not canceled by the timeout", checkpoint.isDisposed());
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
// validate that the received states have been discarded
verify(subtaskState1, times(1)).discardState();
// no confirm message must have been sent
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(0, gateway.getNotifiedCompletedCheckpoints(attemptId).size());
}
checkpointCoordinator.shutdown();
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testMaxConcurrentAttempts.
private void testMaxConcurrentAttempts(int maxConcurrentAttempts) {
try {
JobVertexID jobVertexID1 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointInterval(// periodic interval is 10 ms
10).setCheckpointTimeout(// timeout is very long (200 s)
200000).setMinPauseBetweenCheckpoints(// no extra delay
0L).setMaxConcurrentCheckpoints(maxConcurrentAttempts).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
for (int i = 0; i < maxConcurrentAttempts; i++) {
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
}
assertEquals(maxConcurrentAttempts, gateway.getTriggeredCheckpoints(attemptID1).size());
assertEquals(0, gateway.getNotifiedCompletedCheckpoints(attemptID1).size());
// now, once we acknowledge one checkpoint, it should trigger the next one
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, 1L), TASK_MANAGER_LOCATION_INFO);
final Collection<ScheduledFuture<?>> periodicScheduledTasks = manuallyTriggeredScheduledExecutor.getActivePeriodicScheduledTask();
assertEquals(1, periodicScheduledTasks.size());
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(maxConcurrentAttempts + 1, gateway.getTriggeredCheckpoints(attemptID1).size());
// no further checkpoints should happen
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(maxConcurrentAttempts + 1, gateway.getTriggeredCheckpoints(attemptID1).size());
checkpointCoordinator.shutdown();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testHandleMessagesForNonExistingCheckpoints.
@Test
public void testHandleMessagesForNonExistingCheckpoints() throws Exception {
// create some mock execution vertices and trigger some checkpoint
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
// send some messages that do not belong to either the job or the any
// of the vertices that need to be acknowledged.
// non of the messages should throw an exception
// wrong job id
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO);
// unknown checkpoint
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, 1L), TASK_MANAGER_LOCATION_INFO);
// unknown ack vertex
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), new ExecutionAttemptID(), checkpointId), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.shutdown();
}
Aggregations