Search in sources :

Example 91 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testSharedStateRegistrationOnRestore.

@Test
public void testSharedStateRegistrationOnRestore() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    int parallelism1 = 2;
    int maxParallelism1 = 4;
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).build();
    ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
    List<CompletedCheckpoint> checkpoints = Collections.emptyList();
    SharedStateRegistry firstInstance = SharedStateRegistry.DEFAULT_FACTORY.create(org.apache.flink.util.concurrent.Executors.directExecutor(), checkpoints);
    final EmbeddedCompletedCheckpointStore store = new EmbeddedCompletedCheckpointStore(10, checkpoints, firstInstance);
    // set up the coordinator and validate the initial state
    final CheckpointCoordinatorBuilder coordinatorBuilder = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor);
    final CheckpointCoordinator coordinator = coordinatorBuilder.setCompletedCheckpointStore(store).build();
    final int numCheckpoints = 3;
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    for (int i = 0; i < numCheckpoints; ++i) {
        performIncrementalCheckpoint(graph.getJobID(), coordinator, jobVertex1, keyGroupPartitions1, i);
    }
    List<CompletedCheckpoint> completedCheckpoints = coordinator.getSuccessfulCheckpoints();
    assertEquals(numCheckpoints, completedCheckpoints.size());
    int sharedHandleCount = 0;
    List<Map<StateHandleID, StreamStateHandle>> sharedHandlesByCheckpoint = new ArrayList<>(numCheckpoints);
    for (int i = 0; i < numCheckpoints; ++i) {
        sharedHandlesByCheckpoint.add(new HashMap<>(2));
    }
    int cp = 0;
    for (CompletedCheckpoint completedCheckpoint : completedCheckpoints) {
        for (OperatorState taskState : completedCheckpoint.getOperatorStates().values()) {
            for (OperatorSubtaskState subtaskState : taskState.getStates()) {
                for (KeyedStateHandle keyedStateHandle : subtaskState.getManagedKeyedState()) {
                    // test we are once registered with the current registry
                    verify(keyedStateHandle, times(1)).registerSharedStates(firstInstance, completedCheckpoint.getCheckpointID());
                    IncrementalRemoteKeyedStateHandle incrementalKeyedStateHandle = (IncrementalRemoteKeyedStateHandle) keyedStateHandle;
                    sharedHandlesByCheckpoint.get(cp).putAll(incrementalKeyedStateHandle.getSharedState());
                    for (StreamStateHandle streamStateHandle : incrementalKeyedStateHandle.getSharedState().values()) {
                        assertTrue(!(streamStateHandle instanceof PlaceholderStreamStateHandle));
                        verify(streamStateHandle, never()).discardState();
                        ++sharedHandleCount;
                    }
                    for (StreamStateHandle streamStateHandle : incrementalKeyedStateHandle.getPrivateState().values()) {
                        verify(streamStateHandle, never()).discardState();
                    }
                    verify(incrementalKeyedStateHandle.getMetaStateHandle(), never()).discardState();
                }
                verify(subtaskState, never()).discardState();
            }
        }
        ++cp;
    }
    // 2 (parallelism) x (1 (CP0) + 2 (CP1) + 2 (CP2)) = 10
    assertEquals(10, sharedHandleCount);
    // discard CP0
    store.removeOldestCheckpoint();
    // CP1
    for (Map<StateHandleID, StreamStateHandle> cpList : sharedHandlesByCheckpoint) {
        for (StreamStateHandle streamStateHandle : cpList.values()) {
            verify(streamStateHandle, never()).discardState();
        }
    }
    // shutdown the store
    store.shutdown(JobStatus.SUSPENDED, new CheckpointsCleaner());
    // restore the store
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(jobVertex1);
    assertEquals(JobStatus.SUSPENDED, store.getShutdownStatus().orElse(null));
    SharedStateRegistry secondInstance = SharedStateRegistry.DEFAULT_FACTORY.create(org.apache.flink.util.concurrent.Executors.directExecutor(), store.getAllCheckpoints());
    final EmbeddedCompletedCheckpointStore secondStore = new EmbeddedCompletedCheckpointStore(10, store.getAllCheckpoints(), secondInstance);
    final CheckpointCoordinator secondCoordinator = coordinatorBuilder.setCompletedCheckpointStore(secondStore).build();
    assertTrue(secondCoordinator.restoreLatestCheckpointedStateToAll(tasks, false));
    // validate that all shared states are registered again after the recovery.
    cp = 0;
    for (CompletedCheckpoint completedCheckpoint : completedCheckpoints) {
        for (OperatorState taskState : completedCheckpoint.getOperatorStates().values()) {
            for (OperatorSubtaskState subtaskState : taskState.getStates()) {
                for (KeyedStateHandle keyedStateHandle : subtaskState.getManagedKeyedState()) {
                    VerificationMode verificationMode;
                    // test we are once registered with the new registry
                    if (cp > 0) {
                        verificationMode = times(1);
                    } else {
                        verificationMode = never();
                    }
                    // check that all are registered with the new registry
                    verify(keyedStateHandle, verificationMode).registerSharedStates(secondInstance, completedCheckpoint.getCheckpointID());
                }
            }
        }
        ++cp;
    }
    // discard CP1
    secondStore.removeOldestCheckpoint();
    // we expect that all shared state from CP0 is no longer referenced and discarded. CP2 is
    // still live and also
    // references the state from CP1, so we expect they are not discarded.
    verifyDiscard(sharedHandlesByCheckpoint, cpId -> cpId == 0 ? times(1) : never());
    // discard CP2
    secondStore.removeOldestCheckpoint();
    // still expect shared state not to be discarded because it may be used in later checkpoints
    verifyDiscard(sharedHandlesByCheckpoint, cpId -> cpId == 1 ? never() : atLeast(0));
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) TestingStreamStateHandle(org.apache.flink.runtime.state.TestingStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) HashSet(java.util.HashSet) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) VerificationMode(org.mockito.verification.VerificationMode) StateHandleID(org.apache.flink.runtime.state.StateHandleID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Map(java.util.Map) HashMap(java.util.HashMap) Collections.singletonMap(java.util.Collections.singletonMap) Test(org.junit.Test)

Example 92 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testConcurrentSavepoints.

/**
 * Tests that the savepoints can be triggered concurrently.
 */
@Test
public void testConcurrentSavepoints() throws Exception {
    int numSavepoints = 5;
    JobVertexID jobVertexID1 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    StandaloneCheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
    CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setMaxConcurrentCheckpoints(// max one checkpoint at a time => should not affect savepoints
    1).build();
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setCheckpointIDCounter(checkpointIDCounter).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
    List<CompletableFuture<CompletedCheckpoint>> savepointFutures = new ArrayList<>();
    String savepointDir = tmpFolder.newFolder().getAbsolutePath();
    // Trigger savepoints
    for (int i = 0; i < numSavepoints; i++) {
        savepointFutures.add(checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL));
    }
    // After triggering multiple savepoints, all should in progress
    for (CompletableFuture<CompletedCheckpoint> savepointFuture : savepointFutures) {
        assertFalse(savepointFuture.isDone());
    }
    manuallyTriggeredScheduledExecutor.triggerAll();
    // ACK all savepoints
    long checkpointId = checkpointIDCounter.getLast();
    for (int i = 0; i < numSavepoints; i++, checkpointId--) {
        checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO);
    }
    // After ACKs, all should be completed
    for (CompletableFuture<CompletedCheckpoint> savepointFuture : savepointFutures) {
        assertNotNull(savepointFuture.get());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 93 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testIOExceptionCheckpointExceedsTolerableFailureNumber.

@Test
public void testIOExceptionCheckpointExceedsTolerableFailureNumber() throws Exception {
    // create some mock Execution vertices that receive the checkpoint trigger messages
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).addJobVertex(new JobVertexID()).build();
    final String expectedErrorMessage = "Expected Error Message";
    CheckpointFailureManager checkpointFailureManager = getCheckpointFailureManager(expectedErrorMessage);
    CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(graph, checkpointFailureManager);
    try {
        checkpointCoordinator.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        checkpointCoordinator.abortPendingCheckpoints(new CheckpointException(IO_EXCEPTION));
        fail("Test failed.");
    } catch (Exception e) {
        ExceptionUtils.assertThrowableWithMessage(e, expectedErrorMessage);
    } finally {
        checkpointCoordinator.shutdown();
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) TriFunctionWithException(org.apache.flink.util.function.TriFunctionWithException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) Test(org.junit.Test)

Example 94 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testNotifyCheckpointAbortionInOperatorCoordinator.

@Test
public void testNotifyCheckpointAbortionInOperatorCoordinator() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).build();
    ExecutionVertex executionVertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
    ExecutionAttemptID attemptID = executionVertex.getCurrentExecutionAttempt().getAttemptId();
    CheckpointCoordinatorTestingUtils.MockOperatorCoordinatorCheckpointContext context = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOperatorID(new OperatorID()).setOnCallingCheckpointCoordinator((ignored, future) -> future.complete(new byte[0])).build();
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(context)).build();
    try {
        // Trigger checkpoint 1.
        checkpointCoordinator.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        long checkpointId1 = Collections.max(checkpointCoordinator.getPendingCheckpoints().keySet());
        // Trigger checkpoint 2.
        checkpointCoordinator.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        // Acknowledge checkpoint 2. This should abort checkpoint 1.
        long checkpointId2 = Collections.max(checkpointCoordinator.getPendingCheckpoints().keySet());
        AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID, checkpointId2, new CheckpointMetrics(), null);
        checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, "");
        // OperatorCoordinator should have been notified of the abortion of checkpoint 1.
        assertEquals(Collections.singletonList(1L), context.getAbortedCheckpoints());
        assertEquals(Collections.singletonList(2L), context.getCompletedCheckpoints());
    } finally {
        checkpointCoordinator.shutdown();
    }
}
Also used : ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Tuple2(org.apache.flink.api.java.tuple.Tuple2) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Future(java.util.concurrent.Future) Path(org.apache.flink.core.fs.Path) Map(java.util.Map) CHECKPOINT_DECLINED(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.CHECKPOINT_DECLINED) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Set(java.util.Set) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) Executors(java.util.concurrent.Executors) VerificationMode(org.mockito.verification.VerificationMode) Matchers.any(org.mockito.Matchers.any) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) Assert.assertFalse(org.junit.Assert.assertFalse) MemoryBackendCheckpointStorageAccess(org.apache.flink.runtime.state.memory.MemoryBackendCheckpointStorageAccess) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) StateHandleID(org.apache.flink.runtime.state.StateHandleID) JobStatus(org.apache.flink.api.common.JobStatus) Mockito.spy(org.mockito.Mockito.spy) ArrayList(java.util.ArrayList) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) Nullable(javax.annotation.Nullable) Before(org.junit.Before) NonPersistentMetadataCheckpointStorageLocation(org.apache.flink.runtime.state.memory.NonPersistentMetadataCheckpointStorageLocation) CheckpointStorageLocation(org.apache.flink.runtime.state.CheckpointStorageLocation) Executor(java.util.concurrent.Executor) TriFunctionWithException(org.apache.flink.util.function.TriFunctionWithException) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) File(java.io.File) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) ExecutionException(java.util.concurrent.ExecutionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito.never(org.mockito.Mockito.never) JobID(org.apache.flink.api.common.JobID) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Assert(org.junit.Assert) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobManagerCheckpointStorage(org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage) Mockito.reset(org.mockito.Mockito.reset) Assert.assertEquals(org.junit.Assert.assertEquals) ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) ScheduledFuture(java.util.concurrent.ScheduledFuture) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) TestLogger(org.apache.flink.util.TestLogger) Mockito.atLeast(org.mockito.Mockito.atLeast) CheckpointStorageAccess(org.apache.flink.runtime.state.CheckpointStorageAccess) Assert.fail(org.junit.Assert.fail) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Collection(java.util.Collection) CHECKPOINT_ASYNC_EXCEPTION(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.CHECKPOINT_ASYNC_EXCEPTION) UUID(java.util.UUID) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) IO_EXCEPTION(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.IO_EXCEPTION) List(java.util.List) CheckpointMetadataOutputStream(org.apache.flink.runtime.state.CheckpointMetadataOutputStream) FileSystem(org.apache.flink.core.fs.FileSystem) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) CHECKPOINT_EXPIRED(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.CHECKPOINT_EXPIRED) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) Execution(org.apache.flink.runtime.executiongraph.Execution) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) PERIODIC_SCHEDULER_SHUTDOWN(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.PERIODIC_SCHEDULER_SHUTDOWN) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) Collections.singletonMap(java.util.Collections.singletonMap) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Iterator(java.util.Iterator) Assert.assertNotNull(org.junit.Assert.assertNotNull) ExecutionGraphTestUtils(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) Mockito.when(org.mockito.Mockito.when) INVALID_CHECKPOINT_ID(org.apache.flink.runtime.checkpoint.CheckpointStoreUtil.INVALID_CHECKPOINT_ID) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) TestingStreamStateHandle(org.apache.flink.runtime.state.TestingStreamStateHandle) Mockito.verify(org.mockito.Mockito.verify) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Rule(org.junit.Rule) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 95 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorFailureTest method testStoringFailureHandling.

private void testStoringFailureHandling(Exception failure, int expectedCleanupCalls) throws Exception {
    final JobVertexID jobVertexID1 = new JobVertexID();
    final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).build();
    final ExecutionVertex vertex = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    final ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
    final StandaloneCheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
    final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    final CompletedCheckpointStore completedCheckpointStore = new FailingCompletedCheckpointStore(failure);
    final AtomicInteger cleanupCallCount = new AtomicInteger(0);
    final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointIDCounter(checkpointIDCounter).setCheckpointsCleaner(new CheckpointsCleaner() {

        private static final long serialVersionUID = 2029876992397573325L;

        @Override
        public void cleanCheckpointOnFailedStoring(CompletedCheckpoint completedCheckpoint, Executor executor) {
            cleanupCallCount.incrementAndGet();
            super.cleanCheckpointOnFailedStoring(completedCheckpoint, executor);
        }
    }).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    try {
        checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptId, checkpointIDCounter.getLast()), "unknown location");
        fail("CheckpointException should have been thrown.");
    } catch (CheckpointException e) {
        assertThat(e.getCheckpointFailureReason(), is(CheckpointFailureReason.FINALIZE_CHECKPOINT_FAILURE));
    }
    assertThat(cleanupCallCount.get(), is(expectedCleanupCalls));
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Executor(java.util.concurrent.Executor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph)

Aggregations

ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)120 Test (org.junit.Test)96 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)77 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)53 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)40 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)31 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)24 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)24 HashMap (java.util.HashMap)20 CompletableFuture (java.util.concurrent.CompletableFuture)19 JobID (org.apache.flink.api.common.JobID)19 ArrayList (java.util.ArrayList)17 HashSet (java.util.HashSet)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)17 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)17 ExecutionException (java.util.concurrent.ExecutionException)13 Executor (java.util.concurrent.Executor)13 IOException (java.io.IOException)12