Search in sources :

Example 16 with OperatorSnapshotFutures

use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.

/**
 * FLINK-5667
 *
 * <p>Tests that a concurrent cancel operation does not discard the state handles of an
 * acknowledged checkpoint. The situation can only happen if the cancel call is executed after
 * Environment.acknowledgeCheckpoint() and before the CloseableRegistry.unregisterClosable()
 * call.
 */
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
    final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
    final OneShotLatch completeAcknowledge = new OneShotLatch();
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) {
            acknowledgeCheckpointLatch.trigger();
            // block here so that we can issue the concurrent cancel call
            while (true) {
                try {
                    // wait until we successfully await (no pun intended)
                    completeAcknowledge.await();
                    // when await() returns normally, we break out of the loop
                    break;
                } catch (InterruptedException e) {
                // survive interruptions that arise from thread pool
                // shutdown
                // production code cannot actually throw
                // InterruptedException from
                // checkpoint acknowledgement
                }
            }
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), any(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    KeyedStateHandle managedKeyedStateHandle = mock(KeyedStateHandle.class);
    KeyedStateHandle rawKeyedStateHandle = mock(KeyedStateHandle.class);
    OperatorStateHandle managedOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorStateHandle rawOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(rawKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskName("mock-task").setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult))));
        MockStreamTask streamTask = task.streamTask;
        waitTaskIsRunning(streamTask, task.invocationFuture);
        final long checkpointId = 42L;
        streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        acknowledgeCheckpointLatch.await();
        ArgumentCaptor<TaskStateSnapshot> subtaskStateCaptor = ArgumentCaptor.forClass(TaskStateSnapshot.class);
        // check that the checkpoint has been completed
        verify(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
        TaskStateSnapshot subtaskStates = subtaskStateCaptor.getValue();
        OperatorSubtaskState subtaskState = subtaskStates.getSubtaskStateMappings().iterator().next().getValue();
        // check that the subtask state contains the expected state handles
        assertEquals(singleton(managedKeyedStateHandle), subtaskState.getManagedKeyedState());
        assertEquals(singleton(rawKeyedStateHandle), subtaskState.getRawKeyedState());
        assertEquals(singleton(managedOperatorStateHandle), subtaskState.getManagedOperatorState());
        assertEquals(singleton(rawOperatorStateHandle), subtaskState.getRawOperatorState());
        // check that the state handles have not been discarded
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        streamTask.cancel();
        completeAcknowledge.trigger();
        // canceling the stream task after it has acknowledged the checkpoint should not discard
        // the state handles
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 17 with OperatorSnapshotFutures

use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.

the class StreamTaskTest method testUncaughtExceptionInAsynchronousCheckpointingOperation.

/**
 * Tests that uncaught exceptions in the async part of a checkpoint operation are forwarded to
 * the uncaught exception handler. See <a
 * href="https://issues.apache.org/jira/browse/FLINK-12889">FLINK-12889</a>.
 */
@Test
public void testUncaughtExceptionInAsynchronousCheckpointingOperation() throws Exception {
    final RuntimeException failingCause = new RuntimeException("Test exception");
    FailingDummyEnvironment failingDummyEnvironment = new FailingDummyEnvironment(failingCause);
    // mock the returned snapshots
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(ExceptionallyDoneFuture.of(failingCause), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    final TestingUncaughtExceptionHandler uncaughtExceptionHandler = new TestingUncaughtExceptionHandler();
    RunningTask<MockStreamTask> task = runTask(() -> new MockStreamTask(failingDummyEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult)), uncaughtExceptionHandler));
    MockStreamTask streamTask = task.streamTask;
    waitTaskIsRunning(streamTask, task.invocationFuture);
    streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
    final Throwable uncaughtException = uncaughtExceptionHandler.waitForUncaughtException();
    assertThat(uncaughtException, is(failingCause));
    streamTask.finishInput();
    task.waitForTaskCompletion(false);
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) TestingUncaughtExceptionHandler(org.apache.flink.util.concurrent.TestingUncaughtExceptionHandler) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Test(org.junit.Test)

Example 18 with OperatorSnapshotFutures

use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.

the class StreamTaskTest method testEmptySubtaskStateLeadsToStatelessAcknowledgment.

/**
 * FLINK-5985
 *
 * <p>This test ensures that empty snapshots (no op/keyed stated whatsoever) will be reported as
 * stateless tasks. This happens by translating an empty {@link SubtaskState} into reporting
 * 'null' to #acknowledgeCheckpoint.
 */
@Test
public void testEmptySubtaskStateLeadsToStatelessAcknowledgment() throws Exception {
    // latch blocks until the async checkpoint thread acknowledges
    final OneShotLatch checkpointCompletedLatch = new OneShotLatch();
    final List<SubtaskState> checkpointResult = new ArrayList<>(1);
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            SubtaskState subtaskState = invocation.getArgument(4);
            checkpointResult.add(subtaskState);
            checkpointCompletedLatch.trigger();
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), nullable(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    // mock the operator with empty snapshot result (all state handles are null)
    OneInputStreamOperator<String, String> statelessOperator = streamOperatorWithSnapshot(new OperatorSnapshotFutures());
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(statelessOperator)));
        waitTaskIsRunning(task.streamTask, task.invocationFuture);
        task.streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        checkpointCompletedLatch.await(30, TimeUnit.SECONDS);
        // ensure that 'null' was acknowledged as subtask state
        Assert.assertNull(checkpointResult.get(0));
        task.streamTask.cancel();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) ArrayList(java.util.ArrayList) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with OperatorSnapshotFutures

use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.

the class StreamTaskTest method testRestorePerformedFromInvoke.

@Test
public void testRestorePerformedFromInvoke() throws Exception {
    // given: the operator with empty snapshot result (all state handles are null)
    OneInputStreamOperator<String, String> statelessOperator = streamOperatorWithSnapshot(new OperatorSnapshotFutures());
    DummyEnvironment dummyEnvironment = new DummyEnvironment();
    // when: Launch the task.
    RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(dummyEnvironment, operatorChain(statelessOperator)));
    waitTaskIsRunning(task.streamTask, task.invocationFuture);
    task.streamTask.cancel();
    // then: 'restore' was called even without explicit 'restore' invocation.
    assertThat(task.streamTask.restoreInvocationCount, is(1));
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) DummyEnvironment(org.apache.flink.runtime.operators.testutils.DummyEnvironment) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Test(org.junit.Test)

Example 20 with OperatorSnapshotFutures

use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.

the class SubtaskCheckpointCoordinatorTest method testNotifyCheckpointAbortedBeforeAsyncPhase.

@Test
public void testNotifyCheckpointAbortedBeforeAsyncPhase() throws Exception {
    TestTaskStateManager stateManager = new TestTaskStateManager();
    MockEnvironment mockEnvironment = MockEnvironment.builder().setTaskStateManager(stateManager).build();
    try (SubtaskCheckpointCoordinatorImpl subtaskCheckpointCoordinator = (SubtaskCheckpointCoordinatorImpl) new MockSubtaskCheckpointCoordinatorBuilder().setEnvironment(mockEnvironment).setUnalignedCheckpointEnabled(true).build()) {
        CheckpointOperator checkpointOperator = new CheckpointOperator(new OperatorSnapshotFutures());
        final OperatorChain<String, AbstractStreamOperator<String>> operatorChain = operatorChain(checkpointOperator);
        long checkpointId = 42L;
        // notify checkpoint aborted before execution.
        subtaskCheckpointCoordinator.notifyCheckpointAborted(checkpointId, operatorChain, () -> true);
        assertEquals(1, subtaskCheckpointCoordinator.getAbortedCheckpointSize());
        subtaskCheckpointCoordinator.getChannelStateWriter().start(checkpointId, CheckpointOptions.forCheckpointWithDefaultLocation());
        subtaskCheckpointCoordinator.checkpointState(new CheckpointMetaData(checkpointId, System.currentTimeMillis()), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder(), operatorChain, false, () -> false);
        assertFalse(checkpointOperator.isCheckpointed());
        assertEquals(-1, stateManager.getReportedCheckpointId());
        assertEquals(0, subtaskCheckpointCoordinator.getAbortedCheckpointSize());
        assertEquals(0, subtaskCheckpointCoordinator.getAsyncCheckpointRunnableSize());
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) Test(org.junit.Test)

Aggregations

OperatorSnapshotFutures (org.apache.flink.streaming.api.operators.OperatorSnapshotFutures)21 Test (org.junit.Test)12 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)9 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)6 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)5 MockEnvironment (org.apache.flink.runtime.operators.testutils.MockEnvironment)5 HashMap (java.util.HashMap)4 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)4 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)4 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)4 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)4 IOException (java.io.IOException)3 TimeoutException (java.util.concurrent.TimeoutException)3 CheckpointMetricsBuilder (org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder)3 DummyEnvironment (org.apache.flink.runtime.operators.testutils.DummyEnvironment)3 MockEnvironmentBuilder (org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder)3 AsynchronousException (org.apache.flink.runtime.taskmanager.AsynchronousException)3 Map (java.util.Map)2 ExecutionException (java.util.concurrent.ExecutionException)2 JobID (org.apache.flink.api.common.JobID)2