Search in sources :

Example 26 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointStateRestoreTest method testSetState.

/**
 * Tests that on restore the task state is reset for each stateful task.
 */
@Test
public void testSetState() {
    try {
        KeyGroupRange keyGroupRange = KeyGroupRange.of(0, 0);
        List<SerializableObject> testStates = Collections.singletonList(new SerializableObject());
        final KeyedStateHandle serializedKeyGroupStates = CheckpointCoordinatorTestingUtils.generateKeyGroupState(keyGroupRange, testStates);
        final JobVertexID statefulId = new JobVertexID();
        final JobVertexID statelessId = new JobVertexID();
        ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(statefulId, 3, 256).addJobVertex(statelessId, 2, 256).build();
        ExecutionJobVertex stateful = graph.getJobVertex(statefulId);
        ExecutionJobVertex stateless = graph.getJobVertex(statelessId);
        ExecutionVertex stateful1 = stateful.getTaskVertices()[0];
        ExecutionVertex stateful2 = stateful.getTaskVertices()[1];
        ExecutionVertex stateful3 = stateful.getTaskVertices()[2];
        ExecutionVertex stateless1 = stateless.getTaskVertices()[0];
        ExecutionVertex stateless2 = stateless.getTaskVertices()[1];
        Execution statefulExec1 = stateful1.getCurrentExecutionAttempt();
        Execution statefulExec2 = stateful2.getCurrentExecutionAttempt();
        Execution statefulExec3 = stateful3.getCurrentExecutionAttempt();
        Execution statelessExec1 = stateless1.getCurrentExecutionAttempt();
        Execution statelessExec2 = stateless2.getCurrentExecutionAttempt();
        ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
        CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).build();
        // create ourselves a checkpoint with state
        coord.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        PendingCheckpoint pending = coord.getPendingCheckpoints().values().iterator().next();
        final long checkpointId = pending.getCheckpointId();
        final TaskStateSnapshot subtaskStates = new TaskStateSnapshot();
        subtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(statefulId), OperatorSubtaskState.builder().setManagedKeyedState(serializedKeyGroupStates).build());
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec1.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec2.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec3.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statelessExec1.getAttemptId(), checkpointId), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statelessExec2.getAttemptId(), checkpointId), TASK_MANAGER_LOCATION_INFO);
        assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        // let the coordinator inject the state
        assertTrue(coord.restoreLatestCheckpointedStateToAll(new HashSet<>(Arrays.asList(stateful, stateless)), false));
        // verify that each stateful vertex got the state
        assertEquals(subtaskStates, statefulExec1.getTaskRestore().getTaskStateSnapshot());
        assertEquals(subtaskStates, statefulExec2.getTaskRestore().getTaskStateSnapshot());
        assertEquals(subtaskStates, statefulExec3.getTaskRestore().getTaskStateSnapshot());
        assertNull(statelessExec1.getTaskRestore());
        assertNull(statelessExec2.getTaskRestore());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) SerializableObject(org.apache.flink.util.SerializableObject) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 27 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointStateRestoreTest method testNoCheckpointAvailable.

@Test
public void testNoCheckpointAvailable() {
    try {
        CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().build();
        final boolean restored = coord.restoreLatestCheckpointedStateToAll(Collections.emptySet(), false);
        assertFalse(restored);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 28 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTest method testTriggerCheckpointAfterStopping.

/**
 * Tests that do not trigger checkpoint when stop the coordinator after the eager pre-check.
 */
@Test
public void testTriggerCheckpointAfterStopping() throws Exception {
    StoppingCheckpointIDCounter testingCounter = new StoppingCheckpointIDCounter();
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setCheckpointIDCounter(testingCounter).setTimer(manuallyTriggeredScheduledExecutor).build();
    testingCounter.setOwner(checkpointCoordinator);
    testTriggerCheckpoint(checkpointCoordinator, PERIODIC_SCHEDULER_SHUTDOWN);
}
Also used : CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 29 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTest method testMinDelayBetweenSavepoints.

/**
 * Tests that no minimum delay between savepoints is enforced.
 */
@Test
public void testMinDelayBetweenSavepoints() throws Exception {
    CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setMinPauseBetweenCheckpoints(// very long min delay => should not affect savepoints
    100000000L).setMaxConcurrentCheckpoints(1).build();
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setCheckpointCoordinatorConfiguration(chkConfig).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
    String savepointDir = tmpFolder.newFolder().getAbsolutePath();
    CompletableFuture<CompletedCheckpoint> savepoint0 = checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL);
    assertFalse("Did not trigger savepoint", savepoint0.isDone());
    CompletableFuture<CompletedCheckpoint> savepoint1 = checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL);
    assertFalse("Did not trigger savepoint", savepoint1.isDone());
}
Also used : CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 30 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTest method testReportLatestCompletedCheckpointIdWithAbort.

@Test
public void testReportLatestCompletedCheckpointIdWithAbort() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTransitToRunning(false).build();
    ExecutionVertex task = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
    AtomicLong reportedCheckpointId = new AtomicLong(-1);
    LogicalSlot slot = new TestingLogicalSlotBuilder().setTaskManagerGateway(new SimpleAckingTaskManagerGateway() {

        @Override
        public void notifyCheckpointAborted(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long latestCompletedCheckpointId, long timestamp) {
            reportedCheckpointId.set(latestCompletedCheckpointId);
        }
    }).createTestingLogicalSlot();
    ExecutionGraphTestUtils.setVertexResource(task, slot);
    task.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING);
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).setAllowCheckpointsAfterTasksFinished(true).build();
    // Trigger a successful checkpoint
    CompletableFuture<CompletedCheckpoint> result = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    long completedCheckpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), task.getCurrentExecutionAttempt().getAttemptId(), completedCheckpointId, new CheckpointMetrics(), new TaskStateSnapshot()), "localhost");
    assertTrue(result.isDone());
    assertFalse(result.isCompletedExceptionally());
    result = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    long abortedCheckpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
    checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), task.getCurrentExecutionAttempt().getAttemptId(), abortedCheckpointId, new CheckpointException(CHECKPOINT_EXPIRED)), "localhost");
    assertTrue(result.isCompletedExceptionally());
    assertEquals(completedCheckpointId, reportedCheckpointId.get());
}
Also used : DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) AtomicLong(java.util.concurrent.atomic.AtomicLong) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)46 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)41 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)40 Test (org.junit.Test)37 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)30 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)27 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)22 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)15 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)13 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)13 HashSet (java.util.HashSet)12 CheckpointCoordinatorConfiguration (org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration)12 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)10 CompletableFuture (java.util.concurrent.CompletableFuture)9 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)9 HashMap (java.util.HashMap)8 CheckpointCoordinatorConfigurationBuilder (org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder)8 List (java.util.List)7 ExecutionException (java.util.concurrent.ExecutionException)7 JobID (org.apache.flink.api.common.JobID)7