Search in sources :

Example 26 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testExternalizedCheckpoints.

/**
 * Tests that the externalized checkpoint configuration is respected.
 */
@Test
public void testExternalizedCheckpoints() throws Exception {
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
    // set up the coordinator and validate the initial state
    CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointRetentionPolicy(CheckpointRetentionPolicy.RETAIN_ON_FAILURE).build();
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setTimer(manuallyTriggeredScheduledExecutor).build();
    CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
    for (PendingCheckpoint checkpoint : checkpointCoordinator.getPendingCheckpoints().values()) {
        CheckpointProperties props = checkpoint.getProps();
        CheckpointProperties expected = CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.RETAIN_ON_FAILURE);
        assertEquals(expected, props);
    }
    // the now we should have a completed checkpoint
    checkpointCoordinator.shutdown();
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 27 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testCheckpointAbortsIfTriggerTasksAreFinishedAndIOException.

@Test
public void testCheckpointAbortsIfTriggerTasksAreFinishedAndIOException() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).build();
    // set up the coordinator
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointStorage(new IOExceptionCheckpointStorage()).setTimer(manuallyTriggeredScheduledExecutor).build();
    Arrays.stream(graph.getJobVertex(jobVertexID1).getTaskVertices()).forEach(task -> task.getCurrentExecutionAttempt().markFinished());
    // nothing should be happening
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    checkpointCoordinator.startCheckpointScheduler();
    // trigger the first checkpoint. this should not succeed
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertTrue(checkpointFuture.isCompletedExceptionally());
    // still, nothing should be happening
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    checkpointCoordinator.shutdown();
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 28 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testCheckpointTimeoutIsolated.

@Test
public void testCheckpointTimeoutIsolated() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    // set up the coordinator
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
    // trigger a checkpoint, partially acknowledged
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
    assertFalse(checkpoint.isDisposed());
    OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
    TaskStateSnapshot taskOperatorSubtaskStates1 = spy(new TaskStateSnapshot());
    OperatorSubtaskState subtaskState1 = mock(OperatorSubtaskState.class);
    taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpoint.getCheckpointId(), new CheckpointMetrics(), taskOperatorSubtaskStates1), TASK_MANAGER_LOCATION_INFO);
    // triggers cancelling
    manuallyTriggeredScheduledExecutor.triggerScheduledTasks();
    assertTrue("Checkpoint was not canceled by the timeout", checkpoint.isDisposed());
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // validate that the received states have been discarded
    verify(subtaskState1, times(1)).discardState();
    // no confirm message must have been sent
    for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
        ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
        assertEquals(0, gateway.getNotifiedCompletedCheckpoints(attemptId).size());
    }
    checkpointCoordinator.shutdown();
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 29 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testMaxConcurrentAttempts.

private void testMaxConcurrentAttempts(int maxConcurrentAttempts) {
    try {
        JobVertexID jobVertexID1 = new JobVertexID();
        CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
        ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).setTaskManagerGateway(gateway).build();
        ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
        ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
        CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointInterval(// periodic interval is 10 ms
        10).setCheckpointTimeout(// timeout is very long (200 s)
        200000).setMinPauseBetweenCheckpoints(// no extra delay
        0L).setMaxConcurrentCheckpoints(maxConcurrentAttempts).build();
        CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
        checkpointCoordinator.startCheckpointScheduler();
        for (int i = 0; i < maxConcurrentAttempts; i++) {
            manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
            manuallyTriggeredScheduledExecutor.triggerAll();
        }
        assertEquals(maxConcurrentAttempts, gateway.getTriggeredCheckpoints(attemptID1).size());
        assertEquals(0, gateway.getNotifiedCompletedCheckpoints(attemptID1).size());
        // now, once we acknowledge one checkpoint, it should trigger the next one
        checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, 1L), TASK_MANAGER_LOCATION_INFO);
        final Collection<ScheduledFuture<?>> periodicScheduledTasks = manuallyTriggeredScheduledExecutor.getActivePeriodicScheduledTask();
        assertEquals(1, periodicScheduledTasks.size());
        manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
        manuallyTriggeredScheduledExecutor.triggerAll();
        assertEquals(maxConcurrentAttempts + 1, gateway.getTriggeredCheckpoints(attemptID1).size());
        // no further checkpoints should happen
        manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
        manuallyTriggeredScheduledExecutor.triggerAll();
        assertEquals(maxConcurrentAttempts + 1, gateway.getTriggeredCheckpoints(attemptID1).size());
        checkpointCoordinator.shutdown();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ScheduledFuture(java.util.concurrent.ScheduledFuture) TriFunctionWithException(org.apache.flink.util.function.TriFunctionWithException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph)

Example 30 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testHandleMessagesForNonExistingCheckpoints.

@Test
public void testHandleMessagesForNonExistingCheckpoints() throws Exception {
    // create some mock execution vertices and trigger some checkpoint
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
    long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
    // send some messages that do not belong to either the job or the any
    // of the vertices that need to be acknowledged.
    // non of the messages should throw an exception
    // wrong job id
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO);
    // unknown checkpoint
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, 1L), TASK_MANAGER_LOCATION_INFO);
    // unknown ack vertex
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), new ExecutionAttemptID(), checkpointId), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.shutdown();
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)120 Test (org.junit.Test)96 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)77 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)53 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)40 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)31 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)24 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)24 HashMap (java.util.HashMap)20 CompletableFuture (java.util.concurrent.CompletableFuture)19 JobID (org.apache.flink.api.common.JobID)19 ArrayList (java.util.ArrayList)17 HashSet (java.util.HashSet)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)17 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)17 ExecutionException (java.util.concurrent.ExecutionException)13 Executor (java.util.concurrent.Executor)13 IOException (java.io.IOException)12