Search in sources :

Example 6 with SnapshotResult

use of org.apache.flink.runtime.state.SnapshotResult in project flink by apache.

the class StreamOperatorStateHandlerTest method testFailingBackendSnapshotMethod.

/**
 * Tests that a failing snapshot method call to the keyed state backend will trigger the closing
 * of the StateSnapshotContextSynchronousImpl and the cancellation of the
 * OperatorSnapshotResult. The latter is supposed to also cancel all assigned futures.
 */
@Test
public void testFailingBackendSnapshotMethod() throws Exception {
    final long checkpointId = 42L;
    final long timestamp = 1L;
    try (CloseableRegistry closeableRegistry = new CloseableRegistry()) {
        RunnableFuture<SnapshotResult<KeyedStateHandle>> keyedStateManagedFuture = new CancelableFuture<>();
        RunnableFuture<SnapshotResult<KeyedStateHandle>> keyedStateRawFuture = new CancelableFuture<>();
        RunnableFuture<SnapshotResult<OperatorStateHandle>> operatorStateManagedFuture = new CancelableFuture<>();
        RunnableFuture<SnapshotResult<OperatorStateHandle>> operatorStateRawFuture = new CancelableFuture<>();
        RunnableFuture<SnapshotResult<StateObjectCollection<InputChannelStateHandle>>> inputChannelStateFuture = new CancelableFuture<>();
        RunnableFuture<SnapshotResult<StateObjectCollection<ResultSubpartitionStateHandle>>> resultSubpartitionStateFuture = new CancelableFuture<>();
        OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(keyedStateManagedFuture, keyedStateRawFuture, operatorStateManagedFuture, operatorStateRawFuture, inputChannelStateFuture, resultSubpartitionStateFuture);
        StateSnapshotContextSynchronousImpl context = new TestStateSnapshotContextSynchronousImpl(checkpointId, timestamp, closeableRegistry);
        context.getRawKeyedOperatorStateOutput();
        context.getRawOperatorStateOutput();
        StreamTaskStateInitializerImpl stateInitializer = new StreamTaskStateInitializerImpl(new MockEnvironmentBuilder().build(), new MemoryStateBackend());
        StreamOperatorStateContext stateContext = stateInitializer.streamOperatorStateContext(new OperatorID(), "whatever", new TestProcessingTimeService(), new UnUsedKeyContext(), IntSerializer.INSTANCE, closeableRegistry, new InterceptingOperatorMetricGroup(), 1.0, false);
        StreamOperatorStateHandler stateHandler = new StreamOperatorStateHandler(stateContext, new ExecutionConfig(), closeableRegistry);
        final String keyedStateField = "keyedStateField";
        final String operatorStateField = "operatorStateField";
        CheckpointedStreamOperator checkpointedStreamOperator = new CheckpointedStreamOperator() {

            @Override
            public void initializeState(StateInitializationContext context) throws Exception {
                context.getKeyedStateStore().getState(new ValueStateDescriptor<>(keyedStateField, LongSerializer.INSTANCE)).update(42L);
                context.getOperatorStateStore().getListState(new ListStateDescriptor<>(operatorStateField, LongSerializer.INSTANCE)).add(42L);
            }

            @Override
            public void snapshotState(StateSnapshotContext context) throws Exception {
                throw new ExpectedTestException();
            }
        };
        stateHandler.setCurrentKey("44");
        stateHandler.initializeOperatorState(checkpointedStreamOperator);
        assertThat(stateContext.operatorStateBackend().getRegisteredStateNames(), is(not(empty())));
        assertThat(((AbstractKeyedStateBackend<?>) stateContext.keyedStateBackend()).numKeyValueStatesByName(), equalTo(1));
        try {
            stateHandler.snapshotState(checkpointedStreamOperator, Optional.of(stateContext.internalTimerServiceManager()), "42", 42, 42, CheckpointOptions.forCheckpointWithDefaultLocation(), new MemCheckpointStreamFactory(1024), operatorSnapshotResult, context, false);
            fail("Exception expected.");
        } catch (CheckpointException e) {
            // as CheckpointException is wrapping the cause with SerializedThrowable
            if (!ExceptionUtils.findThrowableWithMessage(e, ExpectedTestException.MESSAGE).isPresent()) {
                throw e;
            }
        }
        assertTrue(keyedStateManagedFuture.isCancelled());
        assertTrue(keyedStateRawFuture.isCancelled());
        assertTrue(context.getKeyedStateStreamFuture().isCancelled());
        assertTrue(operatorStateManagedFuture.isCancelled());
        assertTrue(operatorStateRawFuture.isCancelled());
        assertTrue(context.getOperatorStateStreamFuture().isCancelled());
        assertTrue(inputChannelStateFuture.isCancelled());
        assertTrue(resultSubpartitionStateFuture.isCancelled());
        stateHandler.dispose();
        assertThat(stateContext.operatorStateBackend().getRegisteredBroadcastStateNames(), is(empty()));
        assertThat(stateContext.operatorStateBackend().getRegisteredStateNames(), is(empty()));
        assertThat(((AbstractKeyedStateBackend<?>) stateContext.keyedStateBackend()).numKeyValueStatesByName(), equalTo(0));
    }
}
Also used : MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) CheckpointedStreamOperator(org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.CheckpointedStreamOperator) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) MemCheckpointStreamFactory(org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) InputChannelStateHandle(org.apache.flink.runtime.state.InputChannelStateHandle) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) InterceptingOperatorMetricGroup(org.apache.flink.runtime.metrics.util.InterceptingOperatorMetricGroup) ResultSubpartitionStateHandle(org.apache.flink.runtime.state.ResultSubpartitionStateHandle) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) Test(org.junit.Test)

Example 7 with SnapshotResult

use of org.apache.flink.runtime.state.SnapshotResult in project flink by apache.

the class BackendRestorerProcedureTest method testRestoreProcedureOrderAndFailure.

/**
 * Tests that the restore procedure follows the order of the iterator and will retries failed
 * attempts if there are more options.
 */
@Test
public void testRestoreProcedureOrderAndFailure() throws Exception {
    CloseableRegistry closeableRegistry = new CloseableRegistry();
    CheckpointStreamFactory checkpointStreamFactory = new MemCheckpointStreamFactory(1024);
    ListStateDescriptor<Integer> stateDescriptor = new ListStateDescriptor<>("test-state", Integer.class);
    OperatorStateBackend originalBackend = backendSupplier.apply(Collections.emptyList());
    SnapshotResult<OperatorStateHandle> snapshotResult;
    try {
        ListState<Integer> listState = originalBackend.getListState(stateDescriptor);
        listState.add(0);
        listState.add(1);
        listState.add(2);
        listState.add(3);
        RunnableFuture<SnapshotResult<OperatorStateHandle>> snapshot = originalBackend.snapshot(0L, 0L, checkpointStreamFactory, CheckpointOptions.forCheckpointWithDefaultLocation());
        snapshot.run();
        snapshotResult = snapshot.get();
    } finally {
        originalBackend.close();
        originalBackend.dispose();
    }
    OperatorStateHandle firstFailHandle = mock(OperatorStateHandle.class);
    OperatorStateHandle secondSuccessHandle = spy(snapshotResult.getJobManagerOwnedSnapshot());
    OperatorStateHandle thirdNotUsedHandle = mock(OperatorStateHandle.class);
    List<StateObjectCollection<OperatorStateHandle>> sortedRestoreOptions = Arrays.asList(new StateObjectCollection<>(Collections.singletonList(firstFailHandle)), new StateObjectCollection<>(Collections.singletonList(secondSuccessHandle)), new StateObjectCollection<>(Collections.singletonList(thirdNotUsedHandle)));
    BackendRestorerProcedure<OperatorStateBackend, OperatorStateHandle> restorerProcedure = new BackendRestorerProcedure<>(backendSupplier, closeableRegistry, "test op state backend");
    OperatorStateBackend restoredBackend = restorerProcedure.createAndRestore(sortedRestoreOptions);
    Assert.assertNotNull(restoredBackend);
    try {
        verify(firstFailHandle).openInputStream();
        verify(secondSuccessHandle).openInputStream();
        verifyZeroInteractions(thirdNotUsedHandle);
        ListState<Integer> listState = restoredBackend.getListState(stateDescriptor);
        Iterator<Integer> stateIterator = listState.get().iterator();
        Assert.assertEquals(0, (int) stateIterator.next());
        Assert.assertEquals(1, (int) stateIterator.next());
        Assert.assertEquals(2, (int) stateIterator.next());
        Assert.assertEquals(3, (int) stateIterator.next());
        Assert.assertFalse(stateIterator.hasNext());
    } finally {
        restoredBackend.close();
        restoredBackend.dispose();
    }
}
Also used : MemCheckpointStreamFactory(org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) MemCheckpointStreamFactory(org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory) StateObjectCollection(org.apache.flink.runtime.checkpoint.StateObjectCollection) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) Test(org.junit.Test)

Example 8 with SnapshotResult

use of org.apache.flink.runtime.state.SnapshotResult in project flink by apache.

the class SavepointStateBackendSwitchTestBase method takeSavepoint.

private void takeSavepoint(CheckpointableKeyedStateBackend<String> keyedBackend, File pathToWrite, MapStateDescriptor<Long, Long> stateDescr, ValueStateDescriptor<Long> valueStateDescriptor, ListStateDescriptor<Long> listStateDescriptor, Integer namespace1, Integer namespace2, Integer namespace3, Integer namespace4) throws Exception {
    InternalMapState<String, Integer, Long, Long> mapState = keyedBackend.createInternalState(IntSerializer.INSTANCE, stateDescr);
    InternalValueState<String, Integer, Long> valueState = keyedBackend.createInternalState(IntSerializer.INSTANCE, valueStateDescriptor);
    InternalListState<String, Integer, Long> listState = keyedBackend.createInternalState(IntSerializer.INSTANCE, listStateDescriptor);
    keyedBackend.setCurrentKey("abc");
    mapState.setCurrentNamespace(namespace1);
    mapState.put(33L, 33L);
    mapState.put(55L, 55L);
    mapState.setCurrentNamespace(namespace2);
    mapState.put(22L, 22L);
    mapState.put(11L, 11L);
    listState.setCurrentNamespace(namespace2);
    listState.add(4L);
    listState.add(5L);
    listState.add(6L);
    mapState.setCurrentNamespace(namespace3);
    mapState.put(44L, 44L);
    keyedBackend.setCurrentKey("mno");
    mapState.setCurrentNamespace(namespace3);
    mapState.put(11L, 11L);
    mapState.put(22L, 22L);
    mapState.put(33L, 33L);
    mapState.put(44L, 44L);
    mapState.put(55L, 55L);
    valueState.setCurrentNamespace(namespace3);
    valueState.update(1239L);
    listState.setCurrentNamespace(namespace3);
    listState.add(1L);
    listState.add(2L);
    listState.add(3L);
    mapState.setCurrentNamespace(namespace4);
    mapState.put(1L, 1L);
    // HEAP state backend will keep an empty map as an entry in the underlying State Table
    // we should skip such entries when serializing
    Iterator<Map.Entry<Long, Long>> iterator = mapState.iterator();
    while (iterator.hasNext()) {
        iterator.next();
        iterator.remove();
    }
    KeyGroupedInternalPriorityQueue<TimerHeapInternalTimer<String, Integer>> priorityQueue = keyedBackend.create("event-time", new TimerSerializer<>(keyedBackend.getKeySerializer(), IntSerializer.INSTANCE));
    priorityQueue.add(new TimerHeapInternalTimer<>(1234L, "mno", namespace3));
    priorityQueue.add(new TimerHeapInternalTimer<>(2345L, "mno", namespace2));
    priorityQueue.add(new TimerHeapInternalTimer<>(3456L, "mno", namespace3));
    SnapshotStrategyRunner<KeyedStateHandle, ? extends FullSnapshotResources<?>> savepointRunner = StreamOperatorStateHandler.prepareCanonicalSavepoint(keyedBackend, new CloseableRegistry());
    RunnableFuture<SnapshotResult<KeyedStateHandle>> snapshot = savepointRunner.snapshot(0L, 0L, new MemCheckpointStreamFactory(4 * 1024 * 1024), new CheckpointOptions(SavepointType.savepoint(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
    snapshot.run();
    try (BufferedOutputStream bis = new BufferedOutputStream(new FileOutputStream(pathToWrite))) {
        InstantiationUtil.serializeObject(bis, snapshot.get());
    }
}
Also used : TimerHeapInternalTimer(org.apache.flink.streaming.api.operators.TimerHeapInternalTimer) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) MemCheckpointStreamFactory(org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream)

Example 9 with SnapshotResult

use of org.apache.flink.runtime.state.SnapshotResult in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseBeforeAcknowledge.

/**
 * FLINK-5667
 *
 * <p>Tests that a concurrent cancel operation discards the state handles of a not yet
 * acknowledged checkpoint and prevents sending an acknowledge message to the
 * CheckpointCoordinator. The situation can only happen if the cancel call is executed before
 * Environment.acknowledgeCheckpoint().
 */
@Test
public void testAsyncCheckpointingConcurrentCloseBeforeAcknowledge() throws Exception {
    final TestingKeyedStateHandle managedKeyedStateHandle = new TestingKeyedStateHandle();
    final TestingKeyedStateHandle rawKeyedStateHandle = new TestingKeyedStateHandle();
    final TestingOperatorStateHandle managedOperatorStateHandle = new TestingOperatorStateHandle();
    final TestingOperatorStateHandle rawOperatorStateHandle = new TestingOperatorStateHandle();
    final BlockingRunnableFuture<SnapshotResult<KeyedStateHandle>> rawKeyedStateHandleFuture = new BlockingRunnableFuture<>(2, SnapshotResult.of(rawKeyedStateHandle));
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), rawKeyedStateHandleFuture, DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    final OneInputStreamOperator<String, String> streamOperator = streamOperatorWithSnapshot(operatorSnapshotResult);
    final AcknowledgeDummyEnvironment mockEnvironment = new AcknowledgeDummyEnvironment();
    RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperator)));
    waitTaskIsRunning(task.streamTask, task.invocationFuture);
    final long checkpointId = 42L;
    task.streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
    rawKeyedStateHandleFuture.awaitRun();
    task.streamTask.cancel();
    final FutureUtils.ConjunctFuture<Void> discardFuture = FutureUtils.waitForAll(asList(managedKeyedStateHandle.getDiscardFuture(), rawKeyedStateHandle.getDiscardFuture(), managedOperatorStateHandle.getDiscardFuture(), rawOperatorStateHandle.getDiscardFuture()));
    // make sure that all state handles have been discarded
    discardFuture.get();
    try {
        mockEnvironment.getAcknowledgeCheckpointFuture().get(10L, TimeUnit.MILLISECONDS);
        fail("The checkpoint should not get acknowledged.");
    } catch (TimeoutException expected) {
    // future should not be completed
    }
    task.waitForTaskCompletion(true);
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 10 with SnapshotResult

use of org.apache.flink.runtime.state.SnapshotResult in project flink by apache.

the class StreamTaskTest method testFailingAsyncCheckpointRunnable.

/**
 * Tests that in case of a failing AsyncCheckpointRunnable all operator snapshot results are
 * cancelled and all non partitioned state handles are discarded.
 */
@Test
public void testFailingAsyncCheckpointRunnable() throws Exception {
    // mock the new state operator snapshots
    OperatorSnapshotFutures operatorSnapshotResult1 = mock(OperatorSnapshotFutures.class);
    OperatorSnapshotFutures operatorSnapshotResult2 = mock(OperatorSnapshotFutures.class);
    OperatorSnapshotFutures operatorSnapshotResult3 = mock(OperatorSnapshotFutures.class);
    RunnableFuture<SnapshotResult<OperatorStateHandle>> failingFuture = mock(RunnableFuture.class);
    when(failingFuture.get()).thenThrow(new ExecutionException(new Exception("Test exception")));
    when(operatorSnapshotResult3.getOperatorStateRawFuture()).thenReturn(failingFuture);
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult1), streamOperatorWithSnapshot(operatorSnapshotResult2), streamOperatorWithSnapshot(operatorSnapshotResult3))));
        MockStreamTask streamTask = task.streamTask;
        waitTaskIsRunning(streamTask, task.invocationFuture);
        mockEnvironment.setExpectedExternalFailureCause(Throwable.class);
        streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
        // wait for the completion of the async task
        ExecutorService executor = streamTask.getAsyncOperationsThreadPool();
        executor.shutdown();
        if (!executor.awaitTermination(10000L, TimeUnit.MILLISECONDS)) {
            fail("Executor did not shut down within the given timeout. This indicates that the " + "checkpointing did not resume.");
        }
        assertTrue(mockEnvironment.getActualExternalFailureCause().isPresent());
        verify(operatorSnapshotResult1).cancel();
        verify(operatorSnapshotResult2).cancel();
        verify(operatorSnapshotResult3).cancel();
        streamTask.finishInput();
        task.waitForTaskCompletion(false);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) ExecutorService(java.util.concurrent.ExecutorService) ExecutionException(java.util.concurrent.ExecutionException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FunctionWithException(org.apache.flink.util.function.FunctionWithException) AsynchronousException(org.apache.flink.runtime.taskmanager.AsynchronousException) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RunnableWithException(org.apache.flink.util.function.RunnableWithException) TimeoutException(java.util.concurrent.TimeoutException) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) SupplierWithException(org.apache.flink.util.function.SupplierWithException) BiConsumerWithException(org.apache.flink.util.function.BiConsumerWithException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Aggregations

SnapshotResult (org.apache.flink.runtime.state.SnapshotResult)15 Test (org.junit.Test)13 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)8 IOException (java.io.IOException)4 CloseableRegistry (org.apache.flink.core.fs.CloseableRegistry)4 MemCheckpointStreamFactory (org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory)4 Map (java.util.Map)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 StateObjectCollection (org.apache.flink.runtime.checkpoint.StateObjectCollection)3 InputChannelStateHandle (org.apache.flink.runtime.state.InputChannelStateHandle)3 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)3 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)3 ResultSubpartitionStateHandle (org.apache.flink.runtime.state.ResultSubpartitionStateHandle)3 SupplierWithException (org.apache.flink.util.function.SupplierWithException)3 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 TimeoutException (java.util.concurrent.TimeoutException)2 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)2 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)2 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)2