Search in sources :

Example 11 with CheckpointStreamFactory

use of org.apache.flink.runtime.state.CheckpointStreamFactory in project flink by apache.

the class AbstractStreamOperator method snapshotState.

@Override
public final OperatorSnapshotResult snapshotState(long checkpointId, long timestamp, CheckpointOptions checkpointOptions) throws Exception {
    KeyGroupRange keyGroupRange = null != keyedStateBackend ? keyedStateBackend.getKeyGroupRange() : KeyGroupRange.EMPTY_KEY_GROUP_RANGE;
    OperatorSnapshotResult snapshotInProgress = new OperatorSnapshotResult();
    CheckpointStreamFactory factory = getCheckpointStreamFactory(checkpointOptions);
    try (StateSnapshotContextSynchronousImpl snapshotContext = new StateSnapshotContextSynchronousImpl(checkpointId, timestamp, factory, keyGroupRange, getContainingTask().getCancelables())) {
        snapshotState(snapshotContext);
        snapshotInProgress.setKeyedStateRawFuture(snapshotContext.getKeyedStateStreamFuture());
        snapshotInProgress.setOperatorStateRawFuture(snapshotContext.getOperatorStateStreamFuture());
        if (null != operatorStateBackend) {
            snapshotInProgress.setOperatorStateManagedFuture(operatorStateBackend.snapshot(checkpointId, timestamp, factory, checkpointOptions));
        }
        if (null != keyedStateBackend) {
            snapshotInProgress.setKeyedStateManagedFuture(keyedStateBackend.snapshot(checkpointId, timestamp, factory, checkpointOptions));
        }
    } catch (Exception snapshotException) {
        try {
            snapshotInProgress.cancel();
        } catch (Exception e) {
            snapshotException.addSuppressed(e);
        }
        throw new Exception("Could not complete snapshot " + checkpointId + " for operator " + getOperatorName() + '.', snapshotException);
    }
    return snapshotInProgress;
}
Also used : CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ConcurrentModificationException(java.util.ConcurrentModificationException) IOException(java.io.IOException)

Example 12 with CheckpointStreamFactory

use of org.apache.flink.runtime.state.CheckpointStreamFactory in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.

/**
	 * FLINK-5667
	 *
	 * Tests that a concurrent cancel operation does not discard the state handles of an
	 * acknowledged checkpoint. The situation can only happen if the cancel call is executed
	 * after Environment.acknowledgeCheckpoint() and before the
	 * CloseableRegistry.unregisterClosable() call.
	 */
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
    final long checkpointId = 42L;
    final long timestamp = 1L;
    final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
    final OneShotLatch completeAcknowledge = new OneShotLatch();
    TaskInfo mockTaskInfo = mock(TaskInfo.class);
    when(mockTaskInfo.getTaskNameWithSubtasks()).thenReturn("foobar");
    when(mockTaskInfo.getIndexOfThisSubtask()).thenReturn(0);
    Environment mockEnvironment = mock(Environment.class);
    when(mockEnvironment.getTaskInfo()).thenReturn(mockTaskInfo);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            acknowledgeCheckpointLatch.trigger();
            // block here so that we can issue the concurrent cancel call
            completeAcknowledge.await();
            return null;
        }
    }).when(mockEnvironment).acknowledgeCheckpoint(anyLong(), any(CheckpointMetrics.class), any(SubtaskState.class));
    StreamTask<?, AbstractStreamOperator<?>> streamTask = mock(StreamTask.class, Mockito.CALLS_REAL_METHODS);
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, timestamp);
    streamTask.setEnvironment(mockEnvironment);
    StreamOperator<?> streamOperator = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
    KeyGroupsStateHandle managedKeyedStateHandle = mock(KeyGroupsStateHandle.class);
    KeyGroupsStateHandle rawKeyedStateHandle = mock(KeyGroupsStateHandle.class);
    OperatorStateHandle managedOperatorStateHandle = mock(OperatorStateHandle.class);
    OperatorStateHandle rawOperatorStateHandle = mock(OperatorStateHandle.class);
    OperatorSnapshotResult operatorSnapshotResult = new OperatorSnapshotResult(new DoneFuture<>(managedKeyedStateHandle), new DoneFuture<>(rawKeyedStateHandle), new DoneFuture<>(managedOperatorStateHandle), new DoneFuture<>(rawOperatorStateHandle));
    when(streamOperator.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(operatorSnapshotResult);
    StreamOperator<?>[] streamOperators = { streamOperator };
    OperatorChain<Void, AbstractStreamOperator<Void>> operatorChain = mock(OperatorChain.class);
    when(operatorChain.getAllOperators()).thenReturn(streamOperators);
    StreamStateHandle streamStateHandle = mock(StreamStateHandle.class);
    CheckpointStreamFactory.CheckpointStateOutputStream outStream = mock(CheckpointStreamFactory.CheckpointStateOutputStream.class);
    when(outStream.closeAndGetHandle()).thenReturn(streamStateHandle);
    CheckpointStreamFactory mockStreamFactory = mock(CheckpointStreamFactory.class);
    when(mockStreamFactory.createCheckpointStateOutputStream(anyLong(), anyLong())).thenReturn(outStream);
    AbstractStateBackend mockStateBackend = mock(AbstractStateBackend.class);
    when(mockStateBackend.createStreamFactory(any(JobID.class), anyString())).thenReturn(mockStreamFactory);
    Whitebox.setInternalState(streamTask, "isRunning", true);
    Whitebox.setInternalState(streamTask, "lock", new Object());
    Whitebox.setInternalState(streamTask, "operatorChain", operatorChain);
    Whitebox.setInternalState(streamTask, "cancelables", new CloseableRegistry());
    Whitebox.setInternalState(streamTask, "asyncOperationsThreadPool", Executors.newFixedThreadPool(1));
    Whitebox.setInternalState(streamTask, "configuration", new StreamConfig(new Configuration()));
    Whitebox.setInternalState(streamTask, "stateBackend", mockStateBackend);
    streamTask.triggerCheckpoint(checkpointMetaData, CheckpointOptions.forFullCheckpoint());
    acknowledgeCheckpointLatch.await();
    ArgumentCaptor<SubtaskState> subtaskStateCaptor = ArgumentCaptor.forClass(SubtaskState.class);
    // check that the checkpoint has been completed
    verify(mockEnvironment).acknowledgeCheckpoint(eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
    SubtaskState subtaskState = subtaskStateCaptor.getValue();
    // check that the subtask state contains the expected state handles
    assertEquals(managedKeyedStateHandle, subtaskState.getManagedKeyedState());
    assertEquals(rawKeyedStateHandle, subtaskState.getRawKeyedState());
    assertEquals(new ChainedStateHandle<>(Collections.singletonList(managedOperatorStateHandle)), subtaskState.getManagedOperatorState());
    assertEquals(new ChainedStateHandle<>(Collections.singletonList(rawOperatorStateHandle)), subtaskState.getRawOperatorState());
    // check that the state handles have not been discarded
    verify(managedKeyedStateHandle, never()).discardState();
    verify(rawKeyedStateHandle, never()).discardState();
    verify(managedOperatorStateHandle, never()).discardState();
    verify(rawOperatorStateHandle, never()).discardState();
    streamTask.cancel();
    completeAcknowledge.trigger();
    // canceling the stream task after it has acknowledged the checkpoint should not discard
    // the state handles
    verify(managedKeyedStateHandle, never()).discardState();
    verify(rawKeyedStateHandle, never()).discardState();
    verify(managedOperatorStateHandle, never()).discardState();
    verify(rawOperatorStateHandle, never()).discardState();
}
Also used : Configuration(org.apache.flink.configuration.Configuration) OperatorSnapshotResult(org.apache.flink.streaming.api.operators.OperatorSnapshotResult) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskInfo(org.apache.flink.api.common.TaskInfo) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) AbstractStateBackend(org.apache.flink.runtime.state.AbstractStateBackend) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) StreamCheckpointedOperator(org.apache.flink.streaming.api.operators.StreamCheckpointedOperator) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) Environment(org.apache.flink.runtime.execution.Environment) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 13 with CheckpointStreamFactory

use of org.apache.flink.runtime.state.CheckpointStreamFactory in project flink by apache.

the class HeapKeyedStateBackend method snapshot.

@Override
@SuppressWarnings("unchecked")
public RunnableFuture<KeyGroupsStateHandle> snapshot(final long checkpointId, final long timestamp, final CheckpointStreamFactory streamFactory, CheckpointOptions checkpointOptions) throws Exception {
    if (!hasRegisteredState()) {
        return DoneFuture.nullValue();
    }
    long syncStartTime = System.currentTimeMillis();
    Preconditions.checkState(stateTables.size() <= Short.MAX_VALUE, "Too many KV-States: " + stateTables.size() + ". Currently at most " + Short.MAX_VALUE + " states are supported");
    List<KeyedBackendSerializationProxy.StateMetaInfo<?, ?>> metaInfoProxyList = new ArrayList<>(stateTables.size());
    final Map<String, Integer> kVStateToId = new HashMap<>(stateTables.size());
    final Map<StateTable<K, ?, ?>, StateTableSnapshot> cowStateStableSnapshots = new HashedMap(stateTables.size());
    for (Map.Entry<String, StateTable<K, ?, ?>> kvState : stateTables.entrySet()) {
        RegisteredBackendStateMetaInfo<?, ?> metaInfo = kvState.getValue().getMetaInfo();
        KeyedBackendSerializationProxy.StateMetaInfo<?, ?> metaInfoProxy = new KeyedBackendSerializationProxy.StateMetaInfo(metaInfo.getStateType(), metaInfo.getName(), metaInfo.getNamespaceSerializer(), metaInfo.getStateSerializer());
        metaInfoProxyList.add(metaInfoProxy);
        kVStateToId.put(kvState.getKey(), kVStateToId.size());
        StateTable<K, ?, ?> stateTable = kvState.getValue();
        if (null != stateTable) {
            cowStateStableSnapshots.put(stateTable, stateTable.createSnapshot());
        }
    }
    final KeyedBackendSerializationProxy serializationProxy = new KeyedBackendSerializationProxy(keySerializer, metaInfoProxyList);
    //--------------------------------------------------- this becomes the end of sync part
    // implementation of the async IO operation, based on FutureTask
    final AbstractAsyncIOCallable<KeyGroupsStateHandle, CheckpointStreamFactory.CheckpointStateOutputStream> ioCallable = new AbstractAsyncIOCallable<KeyGroupsStateHandle, CheckpointStreamFactory.CheckpointStateOutputStream>() {

        AtomicBoolean open = new AtomicBoolean(false);

        @Override
        public CheckpointStreamFactory.CheckpointStateOutputStream openIOHandle() throws Exception {
            if (open.compareAndSet(false, true)) {
                CheckpointStreamFactory.CheckpointStateOutputStream stream = streamFactory.createCheckpointStateOutputStream(checkpointId, timestamp);
                try {
                    cancelStreamRegistry.registerClosable(stream);
                    return stream;
                } catch (Exception ex) {
                    open.set(false);
                    throw ex;
                }
            } else {
                throw new IOException("Operation already opened.");
            }
        }

        @Override
        public KeyGroupsStateHandle performOperation() throws Exception {
            long asyncStartTime = System.currentTimeMillis();
            CheckpointStreamFactory.CheckpointStateOutputStream stream = getIoHandle();
            DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(stream);
            serializationProxy.write(outView);
            long[] keyGroupRangeOffsets = new long[keyGroupRange.getNumberOfKeyGroups()];
            for (int keyGroupPos = 0; keyGroupPos < keyGroupRange.getNumberOfKeyGroups(); ++keyGroupPos) {
                int keyGroupId = keyGroupRange.getKeyGroupId(keyGroupPos);
                keyGroupRangeOffsets[keyGroupPos] = stream.getPos();
                outView.writeInt(keyGroupId);
                for (Map.Entry<String, StateTable<K, ?, ?>> kvState : stateTables.entrySet()) {
                    outView.writeShort(kVStateToId.get(kvState.getKey()));
                    cowStateStableSnapshots.get(kvState.getValue()).writeMappingsInKeyGroup(outView, keyGroupId);
                }
            }
            if (open.compareAndSet(true, false)) {
                StreamStateHandle streamStateHandle = stream.closeAndGetHandle();
                KeyGroupRangeOffsets offsets = new KeyGroupRangeOffsets(keyGroupRange, keyGroupRangeOffsets);
                final KeyGroupsStateHandle keyGroupsStateHandle = new KeyGroupsStateHandle(offsets, streamStateHandle);
                if (asynchronousSnapshots) {
                    LOG.info("Heap backend snapshot ({}, asynchronous part) in thread {} took {} ms.", streamFactory, Thread.currentThread(), (System.currentTimeMillis() - asyncStartTime));
                }
                return keyGroupsStateHandle;
            } else {
                throw new IOException("Checkpoint stream already closed.");
            }
        }

        @Override
        public void done(boolean canceled) {
            if (open.compareAndSet(true, false)) {
                CheckpointStreamFactory.CheckpointStateOutputStream stream = getIoHandle();
                if (null != stream) {
                    cancelStreamRegistry.unregisterClosable(stream);
                    IOUtils.closeQuietly(stream);
                }
            }
            for (StateTableSnapshot snapshot : cowStateStableSnapshots.values()) {
                snapshot.release();
            }
        }
    };
    AsyncStoppableTaskWithCallback<KeyGroupsStateHandle> task = AsyncStoppableTaskWithCallback.from(ioCallable);
    if (!asynchronousSnapshots) {
        task.run();
    }
    LOG.info("Heap backend snapshot (" + streamFactory + ", synchronous part) in thread " + Thread.currentThread() + " took " + (System.currentTimeMillis() - syncStartTime) + " ms.");
    return task;
}
Also used : RegisteredBackendStateMetaInfo(org.apache.flink.runtime.state.RegisteredBackendStateMetaInfo) HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) ArrayList(java.util.ArrayList) KeyedBackendSerializationProxy(org.apache.flink.runtime.state.KeyedBackendSerializationProxy) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) IOException(java.io.IOException) AbstractAsyncIOCallable(org.apache.flink.runtime.io.async.AbstractAsyncIOCallable) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) HashedMap(org.apache.commons.collections.map.HashedMap) Map(java.util.Map) HashedMap(org.apache.commons.collections.map.HashedMap) HashMap(java.util.HashMap)

Aggregations

CheckpointStreamFactory (org.apache.flink.runtime.state.CheckpointStreamFactory)13 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)7 JobID (org.apache.flink.api.common.JobID)6 Test (org.junit.Test)6 IOException (java.io.IOException)5 CloseableRegistry (org.apache.flink.core.fs.CloseableRegistry)4 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)4 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)4 StateSnapshotContextSynchronousImpl (org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl)4 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)4 MemCheckpointStreamFactory (org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory)4 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)4 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)3 Environment (org.apache.flink.runtime.execution.Environment)3 AbstractStateBackend (org.apache.flink.runtime.state.AbstractStateBackend)3 OperatorSnapshotResult (org.apache.flink.streaming.api.operators.OperatorSnapshotResult)3 StreamCheckpointedOperator (org.apache.flink.streaming.api.operators.StreamCheckpointedOperator)3 TaskInfo (org.apache.flink.api.common.TaskInfo)2 Configuration (org.apache.flink.configuration.Configuration)2 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)2