Search in sources :

Example 11 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class CheckpointStateRestoreTest method testSetState.

/**
 * Tests that on restore the task state is reset for each stateful task.
 */
@Test
public void testSetState() {
    try {
        KeyGroupRange keyGroupRange = KeyGroupRange.of(0, 0);
        List<SerializableObject> testStates = Collections.singletonList(new SerializableObject());
        final KeyedStateHandle serializedKeyGroupStates = CheckpointCoordinatorTestingUtils.generateKeyGroupState(keyGroupRange, testStates);
        final JobVertexID statefulId = new JobVertexID();
        final JobVertexID statelessId = new JobVertexID();
        ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(statefulId, 3, 256).addJobVertex(statelessId, 2, 256).build();
        ExecutionJobVertex stateful = graph.getJobVertex(statefulId);
        ExecutionJobVertex stateless = graph.getJobVertex(statelessId);
        ExecutionVertex stateful1 = stateful.getTaskVertices()[0];
        ExecutionVertex stateful2 = stateful.getTaskVertices()[1];
        ExecutionVertex stateful3 = stateful.getTaskVertices()[2];
        ExecutionVertex stateless1 = stateless.getTaskVertices()[0];
        ExecutionVertex stateless2 = stateless.getTaskVertices()[1];
        Execution statefulExec1 = stateful1.getCurrentExecutionAttempt();
        Execution statefulExec2 = stateful2.getCurrentExecutionAttempt();
        Execution statefulExec3 = stateful3.getCurrentExecutionAttempt();
        Execution statelessExec1 = stateless1.getCurrentExecutionAttempt();
        Execution statelessExec2 = stateless2.getCurrentExecutionAttempt();
        ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
        CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).build();
        // create ourselves a checkpoint with state
        coord.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        PendingCheckpoint pending = coord.getPendingCheckpoints().values().iterator().next();
        final long checkpointId = pending.getCheckpointId();
        final TaskStateSnapshot subtaskStates = new TaskStateSnapshot();
        subtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(statefulId), OperatorSubtaskState.builder().setManagedKeyedState(serializedKeyGroupStates).build());
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec1.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec2.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec3.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statelessExec1.getAttemptId(), checkpointId), TASK_MANAGER_LOCATION_INFO);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statelessExec2.getAttemptId(), checkpointId), TASK_MANAGER_LOCATION_INFO);
        assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        // let the coordinator inject the state
        assertTrue(coord.restoreLatestCheckpointedStateToAll(new HashSet<>(Arrays.asList(stateful, stateless)), false));
        // verify that each stateful vertex got the state
        assertEquals(subtaskStates, statefulExec1.getTaskRestore().getTaskStateSnapshot());
        assertEquals(subtaskStates, statefulExec2.getTaskRestore().getTaskStateSnapshot());
        assertEquals(subtaskStates, statefulExec3.getTaskRestore().getTaskStateSnapshot());
        assertNull(statelessExec1.getTaskRestore());
        assertNull(statelessExec2.getTaskRestore());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) SerializableObject(org.apache.flink.util.SerializableObject) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class MemoryExecutionGraphInfoStoreTest method testExecutionGraphExpiration.

/**
 * Tests that an expired execution graph is removed from the execution graph store.
 */
@Test
public void testExecutionGraphExpiration() throws Exception {
    final Time expirationTime = Time.milliseconds(1L);
    final ManuallyTriggeredScheduledExecutor scheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    final ManualTicker manualTicker = new ManualTicker();
    try (final MemoryExecutionGraphInfoStore executionGraphInfoStore = new MemoryExecutionGraphInfoStore(expirationTime, Integer.MAX_VALUE, scheduledExecutor, manualTicker)) {
        final ExecutionGraphInfo executionGraphInfo = new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).build());
        executionGraphInfoStore.put(executionGraphInfo);
        // there should one execution graph
        assertThat(executionGraphInfoStore.size(), Matchers.equalTo(1));
        manualTicker.advanceTime(expirationTime.toMilliseconds(), TimeUnit.MILLISECONDS);
        // this should trigger the cleanup after expiration
        scheduledExecutor.triggerScheduledTasks();
        assertThat(executionGraphInfoStore.size(), Matchers.equalTo(0));
        assertThat(executionGraphInfoStore.get(executionGraphInfo.getJobId()), Matchers.nullValue());
        // check that the store is empty
        assertThat(executionGraphInfoStore.size(), Matchers.equalTo(0));
    }
}
Also used : ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) Time(org.apache.flink.api.common.time.Time) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) ManualTicker(org.apache.flink.runtime.util.ManualTicker) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Test(org.junit.Test)

Example 13 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class CheckpointCoordinatorTest method testNotifyCheckpointAbortionInOperatorCoordinator.

@Test
public void testNotifyCheckpointAbortionInOperatorCoordinator() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).build();
    ExecutionVertex executionVertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
    ExecutionAttemptID attemptID = executionVertex.getCurrentExecutionAttempt().getAttemptId();
    CheckpointCoordinatorTestingUtils.MockOperatorCoordinatorCheckpointContext context = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOperatorID(new OperatorID()).setOnCallingCheckpointCoordinator((ignored, future) -> future.complete(new byte[0])).build();
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(context)).build();
    try {
        // Trigger checkpoint 1.
        checkpointCoordinator.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        long checkpointId1 = Collections.max(checkpointCoordinator.getPendingCheckpoints().keySet());
        // Trigger checkpoint 2.
        checkpointCoordinator.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        // Acknowledge checkpoint 2. This should abort checkpoint 1.
        long checkpointId2 = Collections.max(checkpointCoordinator.getPendingCheckpoints().keySet());
        AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID, checkpointId2, new CheckpointMetrics(), null);
        checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, "");
        // OperatorCoordinator should have been notified of the abortion of checkpoint 1.
        assertEquals(Collections.singletonList(1L), context.getAbortedCheckpoints());
        assertEquals(Collections.singletonList(2L), context.getCompletedCheckpoints());
    } finally {
        checkpointCoordinator.shutdown();
    }
}
Also used : ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Tuple2(org.apache.flink.api.java.tuple.Tuple2) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Future(java.util.concurrent.Future) Path(org.apache.flink.core.fs.Path) Map(java.util.Map) CHECKPOINT_DECLINED(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.CHECKPOINT_DECLINED) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Set(java.util.Set) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) Executors(java.util.concurrent.Executors) VerificationMode(org.mockito.verification.VerificationMode) Matchers.any(org.mockito.Matchers.any) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) Assert.assertFalse(org.junit.Assert.assertFalse) MemoryBackendCheckpointStorageAccess(org.apache.flink.runtime.state.memory.MemoryBackendCheckpointStorageAccess) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) StateHandleID(org.apache.flink.runtime.state.StateHandleID) JobStatus(org.apache.flink.api.common.JobStatus) Mockito.spy(org.mockito.Mockito.spy) ArrayList(java.util.ArrayList) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) Nullable(javax.annotation.Nullable) Before(org.junit.Before) NonPersistentMetadataCheckpointStorageLocation(org.apache.flink.runtime.state.memory.NonPersistentMetadataCheckpointStorageLocation) CheckpointStorageLocation(org.apache.flink.runtime.state.CheckpointStorageLocation) Executor(java.util.concurrent.Executor) TriFunctionWithException(org.apache.flink.util.function.TriFunctionWithException) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) File(java.io.File) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) ExecutionException(java.util.concurrent.ExecutionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito.never(org.mockito.Mockito.never) JobID(org.apache.flink.api.common.JobID) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Assert(org.junit.Assert) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobManagerCheckpointStorage(org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage) Mockito.reset(org.mockito.Mockito.reset) Assert.assertEquals(org.junit.Assert.assertEquals) ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) ScheduledFuture(java.util.concurrent.ScheduledFuture) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) TestLogger(org.apache.flink.util.TestLogger) Mockito.atLeast(org.mockito.Mockito.atLeast) CheckpointStorageAccess(org.apache.flink.runtime.state.CheckpointStorageAccess) Assert.fail(org.junit.Assert.fail) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Collection(java.util.Collection) CHECKPOINT_ASYNC_EXCEPTION(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.CHECKPOINT_ASYNC_EXCEPTION) UUID(java.util.UUID) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) IO_EXCEPTION(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.IO_EXCEPTION) List(java.util.List) CheckpointMetadataOutputStream(org.apache.flink.runtime.state.CheckpointMetadataOutputStream) FileSystem(org.apache.flink.core.fs.FileSystem) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) CHECKPOINT_EXPIRED(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.CHECKPOINT_EXPIRED) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) Execution(org.apache.flink.runtime.executiongraph.Execution) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) PERIODIC_SCHEDULER_SHUTDOWN(org.apache.flink.runtime.checkpoint.CheckpointFailureReason.PERIODIC_SCHEDULER_SHUTDOWN) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) Collections.singletonMap(java.util.Collections.singletonMap) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Iterator(java.util.Iterator) Assert.assertNotNull(org.junit.Assert.assertNotNull) ExecutionGraphTestUtils(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) Mockito.when(org.mockito.Mockito.when) INVALID_CHECKPOINT_ID(org.apache.flink.runtime.checkpoint.CheckpointStoreUtil.INVALID_CHECKPOINT_ID) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) TestingStreamStateHandle(org.apache.flink.runtime.state.TestingStreamStateHandle) Mockito.verify(org.mockito.Mockito.verify) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Rule(org.junit.Rule) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 14 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class CheckpointCoordinatorFailureTest method testStoringFailureHandling.

private void testStoringFailureHandling(Exception failure, int expectedCleanupCalls) throws Exception {
    final JobVertexID jobVertexID1 = new JobVertexID();
    final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).build();
    final ExecutionVertex vertex = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    final ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
    final StandaloneCheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
    final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    final CompletedCheckpointStore completedCheckpointStore = new FailingCompletedCheckpointStore(failure);
    final AtomicInteger cleanupCallCount = new AtomicInteger(0);
    final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointIDCounter(checkpointIDCounter).setCheckpointsCleaner(new CheckpointsCleaner() {

        private static final long serialVersionUID = 2029876992397573325L;

        @Override
        public void cleanCheckpointOnFailedStoring(CompletedCheckpoint completedCheckpoint, Executor executor) {
            cleanupCallCount.incrementAndGet();
            super.cleanCheckpointOnFailedStoring(completedCheckpoint, executor);
        }
    }).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    try {
        checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptId, checkpointIDCounter.getLast()), "unknown location");
        fail("CheckpointException should have been thrown.");
    } catch (CheckpointException e) {
        assertThat(e.getCheckpointFailureReason(), is(CheckpointFailureReason.FINALIZE_CHECKPOINT_FAILURE));
    }
    assertThat(cleanupCallCount.get(), is(expectedCleanupCalls));
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Executor(java.util.concurrent.Executor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph)

Example 15 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class CheckpointCoordinatorFailureTest method testFailingCompletedCheckpointStoreAdd.

/**
 * Tests that a failure while storing a completed checkpoint in the completed checkpoint store
 * will properly fail the originating pending checkpoint and clean upt the completed checkpoint.
 */
@Test
public void testFailingCompletedCheckpointStoreAdd() throws Exception {
    JobVertexID jobVertexId = new JobVertexID();
    final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    ExecutionGraph testGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).build();
    ExecutionVertex vertex = testGraph.getJobVertex(jobVertexId).getTaskVertices()[0];
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(testGraph).setCompletedCheckpointStore(new FailingCompletedCheckpointStore(new Exception("The failing completed checkpoint store failed again... :-("))).setTimer(manuallyTriggeredScheduledExecutor).build();
    coord.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertEquals(1, coord.getNumberOfPendingCheckpoints());
    PendingCheckpoint pendingCheckpoint = coord.getPendingCheckpoints().values().iterator().next();
    assertFalse(pendingCheckpoint.isDisposed());
    final long checkpointId = coord.getPendingCheckpoints().keySet().iterator().next();
    KeyedStateHandle managedKeyedHandle = mock(KeyedStateHandle.class);
    KeyedStateHandle rawKeyedHandle = mock(KeyedStateHandle.class);
    OperatorStateHandle managedOpHandle = mock(OperatorStreamStateHandle.class);
    OperatorStateHandle rawOpHandle = mock(OperatorStreamStateHandle.class);
    InputChannelStateHandle inputChannelStateHandle = new InputChannelStateHandle(new InputChannelInfo(0, 1), mock(StreamStateHandle.class), Collections.singletonList(1L));
    ResultSubpartitionStateHandle resultSubpartitionStateHandle = new ResultSubpartitionStateHandle(new ResultSubpartitionInfo(0, 1), mock(StreamStateHandle.class), Collections.singletonList(1L));
    final OperatorSubtaskState operatorSubtaskState = spy(OperatorSubtaskState.builder().setManagedOperatorState(managedOpHandle).setRawOperatorState(rawOpHandle).setManagedKeyedState(managedKeyedHandle).setRawKeyedState(rawKeyedHandle).setInputChannelState(StateObjectCollection.singleton(inputChannelStateHandle)).setResultSubpartitionState(StateObjectCollection.singleton(resultSubpartitionStateHandle)).build());
    TaskStateSnapshot subtaskState = spy(new TaskStateSnapshot());
    subtaskState.putSubtaskStateByOperatorID(new OperatorID(), operatorSubtaskState);
    when(subtaskState.getSubtaskStateByOperatorID(OperatorID.fromJobVertexID(vertex.getJobvertexId()))).thenReturn(operatorSubtaskState);
    AcknowledgeCheckpoint acknowledgeMessage = new AcknowledgeCheckpoint(testGraph.getJobID(), vertex.getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskState);
    try {
        coord.receiveAcknowledgeMessage(acknowledgeMessage, "Unknown location");
        fail("Expected a checkpoint exception because the completed checkpoint store could not " + "store the completed checkpoint.");
    } catch (CheckpointException e) {
    // ignore because we expected this exception
    }
    // make sure that the pending checkpoint has been discarded after we could not complete it
    assertTrue(pendingCheckpoint.isDisposed());
    // make sure that the subtask state has been discarded after we could not complete it.
    verify(operatorSubtaskState).discardState();
    verify(operatorSubtaskState.getManagedOperatorState().iterator().next()).discardState();
    verify(operatorSubtaskState.getRawOperatorState().iterator().next()).discardState();
    verify(operatorSubtaskState.getManagedKeyedState().iterator().next()).discardState();
    verify(operatorSubtaskState.getRawKeyedState().iterator().next()).discardState();
    verify(operatorSubtaskState.getInputChannelState().iterator().next().getDelegate()).discardState();
    verify(operatorSubtaskState.getResultSubpartitionState().iterator().next().getDelegate()).discardState();
}
Also used : InputChannelInfo(org.apache.flink.runtime.checkpoint.channel.InputChannelInfo) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) PossibleInconsistentStateException(org.apache.flink.runtime.persistence.PossibleInconsistentStateException) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ResultSubpartitionInfo(org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ResultSubpartitionStateHandle(org.apache.flink.runtime.state.ResultSubpartitionStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) InputChannelStateHandle(org.apache.flink.runtime.state.InputChannelStateHandle) Test(org.junit.Test)

Aggregations

ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)19 Test (org.junit.Test)18 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)11 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 CompletableFuture (java.util.concurrent.CompletableFuture)10 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)10 Executor (java.util.concurrent.Executor)9 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)9 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)8 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)7 ScheduledExecutor (org.apache.flink.util.concurrent.ScheduledExecutor)7 HashSet (java.util.HashSet)6 List (java.util.List)6 ExecutionException (java.util.concurrent.ExecutionException)5 Nullable (javax.annotation.Nullable)5 JobID (org.apache.flink.api.common.JobID)5 Arrays (java.util.Arrays)4 TimeUnit (java.util.concurrent.TimeUnit)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 JobStatus (org.apache.flink.api.common.JobStatus)4