Search in sources :

Example 1 with NonPersistentMetadataCheckpointStorageLocation

use of org.apache.flink.runtime.state.memory.NonPersistentMetadataCheckpointStorageLocation in project flink by apache.

the class CheckpointCoordinatorTest method testCompleteCheckpointFailureWithExternallyInducedSource.

@Test
public void testCompleteCheckpointFailureWithExternallyInducedSource() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
    OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
    OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
    TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot();
    TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot();
    OperatorSubtaskState subtaskState1 = OperatorSubtaskState.builder().build();
    OperatorSubtaskState subtaskState2 = OperatorSubtaskState.builder().build();
    taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
    taskOperatorSubtaskStates2.putSubtaskStateByOperatorID(opID2, subtaskState2);
    // Create a mock OperatorCoordinatorCheckpointContext which completes the checkpoint
    // immediately.
    AtomicBoolean coordCheckpointDone = new AtomicBoolean(false);
    OperatorCoordinatorCheckpointContext coordinatorCheckpointContext = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOnCallingCheckpointCoordinator((checkpointId, result) -> {
        coordCheckpointDone.set(true);
        result.complete(new byte[0]);
    }).setOperatorID(opID1).build();
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(coordinatorCheckpointContext)).setCheckpointStorage(new JobManagerCheckpointStorage() {

        private static final long serialVersionUID = 8134582566514272546L;

        // Throw exception when finalizing the checkpoint.
        @Override
        public CheckpointStorageAccess createCheckpointStorage(JobID jobId) throws IOException {
            return new MemoryBackendCheckpointStorageAccess(jobId, null, null, 100) {

                @Override
                public CheckpointStorageLocation initializeLocationForCheckpoint(long checkpointId) throws IOException {
                    return new NonPersistentMetadataCheckpointStorageLocation(1000) {

                        @Override
                        public CheckpointMetadataOutputStream createMetadataOutputStream() throws IOException {
                            throw new IOException("Artificial Exception");
                        }
                    };
                }
            };
        }
    }).build();
    AtomicReference<Long> checkpointIdRef = new AtomicReference<>();
    // Add a master hook which triggers and acks the task checkpoint immediately.
    // In this case the task checkpoints would complete before the job master checkpoint
    // completes.
    checkpointCoordinator.addMasterHook(new MasterTriggerRestoreHook<Integer>() {

        @Override
        public String getIdentifier() {
            return "anything";
        }

        @Override
        @Nullable
        public CompletableFuture<Integer> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
            assertTrue("The coordinator checkpoint should have finished.", coordCheckpointDone.get());
            // Acknowledge the checkpoint in the master hooks so the task snapshots
            // complete before
            // the master state snapshot completes.
            checkpointIdRef.set(checkpointId);
            AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1);
            AcknowledgeCheckpoint acknowledgeCheckpoint2 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
            checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
            checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint2, TASK_MANAGER_LOCATION_INFO);
            return null;
        }

        @Override
        public void restoreCheckpoint(long checkpointId, Integer checkpointData) throws Exception {
        }

        @Override
        public SimpleVersionedSerializer<Integer> createCheckpointDataSerializer() {
            return new SimpleVersionedSerializer<Integer>() {

                @Override
                public int getVersion() {
                    return 0;
                }

                @Override
                public byte[] serialize(Integer obj) throws IOException {
                    return new byte[0];
                }

                @Override
                public Integer deserialize(int version, byte[] serialized) throws IOException {
                    return 1;
                }
            };
        }
    });
    // trigger the first checkpoint. this should succeed
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertTrue(checkpointFuture.isCompletedExceptionally());
    assertTrue(checkpointCoordinator.getSuccessfulCheckpoints().isEmpty());
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) NonPersistentMetadataCheckpointStorageLocation(org.apache.flink.runtime.state.memory.NonPersistentMetadataCheckpointStorageLocation) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CompletableFuture(java.util.concurrent.CompletableFuture) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Executor(java.util.concurrent.Executor) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) MemoryBackendCheckpointStorageAccess(org.apache.flink.runtime.state.memory.MemoryBackendCheckpointStorageAccess) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) JobManagerCheckpointStorage(org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage) TriFunctionWithException(org.apache.flink.util.function.TriFunctionWithException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) AtomicLong(java.util.concurrent.atomic.AtomicLong) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) JobID(org.apache.flink.api.common.JobID) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 ExecutionException (java.util.concurrent.ExecutionException)1 Executor (java.util.concurrent.Executor)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Nullable (javax.annotation.Nullable)1 JobID (org.apache.flink.api.common.JobID)1 SimpleVersionedSerializer (org.apache.flink.core.io.SimpleVersionedSerializer)1 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)1 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)1 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)1 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)1 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)1 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)1 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)1 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)1 RpcException (org.apache.flink.runtime.rpc.exceptions.RpcException)1 MemoryBackendCheckpointStorageAccess (org.apache.flink.runtime.state.memory.MemoryBackendCheckpointStorageAccess)1