Search in sources :

Example 11 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class PendingCheckpoint method acknowledgeTask.

/**
	 * Acknowledges the task with the given execution attempt id and the given subtask state.
	 *
	 * @param executionAttemptId of the acknowledged task
	 * @param subtaskState of the acknowledged task
	 * @param metrics Checkpoint metrics for the stats
	 * @return TaskAcknowledgeResult of the operation
	 */
public TaskAcknowledgeResult acknowledgeTask(ExecutionAttemptID executionAttemptId, SubtaskState subtaskState, CheckpointMetrics metrics) {
    synchronized (lock) {
        if (discarded) {
            return TaskAcknowledgeResult.DISCARDED;
        }
        final ExecutionVertex vertex = notYetAcknowledgedTasks.remove(executionAttemptId);
        if (vertex == null) {
            if (acknowledgedTasks.contains(executionAttemptId)) {
                return TaskAcknowledgeResult.DUPLICATE;
            } else {
                return TaskAcknowledgeResult.UNKNOWN;
            }
        } else {
            acknowledgedTasks.add(executionAttemptId);
        }
        JobVertexID jobVertexID = vertex.getJobvertexId();
        int subtaskIndex = vertex.getParallelSubtaskIndex();
        long ackTimestamp = System.currentTimeMillis();
        long stateSize = 0;
        if (null != subtaskState) {
            TaskState taskState = taskStates.get(jobVertexID);
            if (null == taskState) {
                @SuppressWarnings("deprecation") ChainedStateHandle<StreamStateHandle> nonPartitionedState = subtaskState.getLegacyOperatorState();
                ChainedStateHandle<OperatorStateHandle> partitioneableState = subtaskState.getManagedOperatorState();
                //TODO this should go away when we remove chained state, assigning state to operators directly instead
                int chainLength;
                if (nonPartitionedState != null) {
                    chainLength = nonPartitionedState.getLength();
                } else if (partitioneableState != null) {
                    chainLength = partitioneableState.getLength();
                } else {
                    chainLength = 1;
                }
                taskState = new TaskState(jobVertexID, vertex.getTotalNumberOfParallelSubtasks(), vertex.getMaxParallelism(), chainLength);
                taskStates.put(jobVertexID, taskState);
            }
            taskState.putState(subtaskIndex, subtaskState);
            stateSize = subtaskState.getStateSize();
        }
        ++numAcknowledgedTasks;
        // publish the checkpoint statistics
        // to prevent null-pointers from concurrent modification, copy reference onto stack
        final PendingCheckpointStats statsCallback = this.statsCallback;
        if (statsCallback != null) {
            // Do this in millis because the web frontend works with them
            long alignmentDurationMillis = metrics.getAlignmentDurationNanos() / 1_000_000;
            SubtaskStateStats subtaskStateStats = new SubtaskStateStats(subtaskIndex, ackTimestamp, stateSize, metrics.getSyncDurationMillis(), metrics.getAsyncDurationMillis(), metrics.getBytesBufferedInAlignment(), alignmentDurationMillis);
            statsCallback.reportSubtaskStats(jobVertexID, subtaskStateStats);
        }
        return TaskAcknowledgeResult.SUCCESS;
    }
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Savepoint(org.apache.flink.runtime.checkpoint.savepoint.Savepoint)

Example 12 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class RoundRobinOperatorStateRepartitioner method repartition.

/**
	 * Repartition all named states.
	 */
private List<Map<StreamStateHandle, OperatorStateHandle>> repartition(GroupByStateNameResults nameToStateByMode, int parallelism) {
    // We will use this to merge w.r.t. StreamStateHandles for each parallel subtask inside the maps
    List<Map<StreamStateHandle, OperatorStateHandle>> mergeMapList = new ArrayList<>(parallelism);
    // Initialize
    for (int i = 0; i < parallelism; ++i) {
        mergeMapList.add(new HashMap<StreamStateHandle, OperatorStateHandle>());
    }
    // Start with the state handles we distribute round robin by splitting by offsets
    Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> distributeNameToState = nameToStateByMode.getByMode(OperatorStateHandle.Mode.SPLIT_DISTRIBUTE);
    int startParallelOp = 0;
    // Iterate all named states and repartition one named state at a time per iteration
    for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : distributeNameToState.entrySet()) {
        List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
        // Determine actual number of partitions for this named state
        int totalPartitions = 0;
        for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> offsets : current) {
            totalPartitions += offsets.f1.getOffsets().length;
        }
        // Repartition the state across the parallel operator instances
        int lstIdx = 0;
        int offsetIdx = 0;
        int baseFraction = totalPartitions / parallelism;
        int remainder = totalPartitions % parallelism;
        int newStartParallelOp = startParallelOp;
        for (int i = 0; i < parallelism; ++i) {
            // Preparation: calculate the actual index considering wrap around
            int parallelOpIdx = (i + startParallelOp) % parallelism;
            // Now calculate the number of partitions we will assign to the parallel instance in this round ...
            int numberOfPartitionsToAssign = baseFraction;
            // ... and distribute odd partitions while we still have some, one at a time
            if (remainder > 0) {
                ++numberOfPartitionsToAssign;
                --remainder;
            } else if (remainder == 0) {
                // We are out of odd partitions now and begin our next redistribution round with the current
                // parallel operator to ensure fair load balance
                newStartParallelOp = parallelOpIdx;
                --remainder;
            }
            // Now start collection the partitions for the parallel instance into this list
            List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> parallelOperatorState = new ArrayList<>();
            while (numberOfPartitionsToAssign > 0) {
                Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithOffsets = current.get(lstIdx);
                long[] offsets = handleWithOffsets.f1.getOffsets();
                int remaining = offsets.length - offsetIdx;
                // Repartition offsets
                long[] offs;
                if (remaining > numberOfPartitionsToAssign) {
                    offs = Arrays.copyOfRange(offsets, offsetIdx, offsetIdx + numberOfPartitionsToAssign);
                    offsetIdx += numberOfPartitionsToAssign;
                } else {
                    if (OPTIMIZE_MEMORY_USE) {
                        // GC
                        handleWithOffsets.f1 = null;
                    }
                    offs = Arrays.copyOfRange(offsets, offsetIdx, offsets.length);
                    offsetIdx = 0;
                    ++lstIdx;
                }
                parallelOperatorState.add(new Tuple2<>(handleWithOffsets.f0, new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE)));
                numberOfPartitionsToAssign -= remaining;
                // As a last step we merge partitions that use the same StreamStateHandle in a single
                // OperatorStateHandle
                Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(parallelOpIdx);
                OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithOffsets.f0);
                if (operatorStateHandle == null) {
                    operatorStateHandle = new OperatorStateHandle(new HashMap<String, OperatorStateHandle.StateMetaInfo>(), handleWithOffsets.f0);
                    mergeMap.put(handleWithOffsets.f0, operatorStateHandle);
                }
                operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
            }
        }
        startParallelOp = newStartParallelOp;
        e.setValue(null);
    }
    // Now we also add the state handles marked for broadcast to all parallel instances
    Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> broadcastNameToState = nameToStateByMode.getByMode(OperatorStateHandle.Mode.BROADCAST);
    for (int i = 0; i < parallelism; ++i) {
        Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(i);
        for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : broadcastNameToState.entrySet()) {
            List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
            for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithMetaInfo : current) {
                OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithMetaInfo.f0);
                if (operatorStateHandle == null) {
                    operatorStateHandle = new OperatorStateHandle(new HashMap<String, OperatorStateHandle.StateMetaInfo>(), handleWithMetaInfo.f0);
                    mergeMap.put(handleWithMetaInfo.f0, operatorStateHandle);
                }
                operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), handleWithMetaInfo.f1);
            }
        }
    }
    return mergeMapList;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) List(java.util.List) ArrayList(java.util.ArrayList) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) EnumMap(java.util.EnumMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 13 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class AbstractStreamOperatorTest method testFailingBackendSnapshotMethod.

/**
	 * Tests that a failing snapshot method call to the keyed state backend will trigger the closing
	 * of the StateSnapshotContextSynchronousImpl and the cancellation of the
	 * OperatorSnapshotResult. The latter is supposed to also cancel all assigned futures.
	 */
@Test
public void testFailingBackendSnapshotMethod() throws Exception {
    final long checkpointId = 42L;
    final long timestamp = 1L;
    final Exception failingException = new Exception("Test exception");
    final CloseableRegistry closeableRegistry = new CloseableRegistry();
    RunnableFuture<KeyGroupsStateHandle> futureKeyGroupStateHandle = mock(RunnableFuture.class);
    RunnableFuture<OperatorStateHandle> futureOperatorStateHandle = mock(RunnableFuture.class);
    StateSnapshotContextSynchronousImpl context = mock(StateSnapshotContextSynchronousImpl.class);
    when(context.getKeyedStateStreamFuture()).thenReturn(futureKeyGroupStateHandle);
    when(context.getOperatorStateStreamFuture()).thenReturn(futureOperatorStateHandle);
    OperatorSnapshotResult operatorSnapshotResult = spy(new OperatorSnapshotResult());
    whenNew(StateSnapshotContextSynchronousImpl.class).withAnyArguments().thenReturn(context);
    whenNew(OperatorSnapshotResult.class).withAnyArguments().thenReturn(operatorSnapshotResult);
    CheckpointStreamFactory streamFactory = mock(CheckpointStreamFactory.class);
    StreamTask<Void, AbstractStreamOperator<Void>> containingTask = mock(StreamTask.class);
    when(containingTask.getCancelables()).thenReturn(closeableRegistry);
    AbstractStreamOperator<Void> operator = mock(AbstractStreamOperator.class);
    when(operator.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenCallRealMethod();
    // The amount of mocking in this test makes it necessary to make the
    // getCheckpointStreamFactory method visible for the test and to
    // overwrite its behaviour.
    when(operator.getCheckpointStreamFactory(any(CheckpointOptions.class))).thenReturn(streamFactory);
    doReturn(containingTask).when(operator).getContainingTask();
    RunnableFuture<OperatorStateHandle> futureManagedOperatorStateHandle = mock(RunnableFuture.class);
    OperatorStateBackend operatorStateBackend = mock(OperatorStateBackend.class);
    when(operatorStateBackend.snapshot(eq(checkpointId), eq(timestamp), eq(streamFactory), any(CheckpointOptions.class))).thenReturn(futureManagedOperatorStateHandle);
    AbstractKeyedStateBackend<?> keyedStateBackend = mock(AbstractKeyedStateBackend.class);
    when(keyedStateBackend.snapshot(eq(checkpointId), eq(timestamp), eq(streamFactory), eq(CheckpointOptions.forFullCheckpoint()))).thenThrow(failingException);
    Whitebox.setInternalState(operator, "operatorStateBackend", operatorStateBackend);
    Whitebox.setInternalState(operator, "keyedStateBackend", keyedStateBackend);
    Whitebox.setInternalState(operator, "checkpointStreamFactory", streamFactory);
    try {
        operator.snapshotState(checkpointId, timestamp, CheckpointOptions.forFullCheckpoint());
        fail("Exception expected.");
    } catch (Exception e) {
        assertEquals(failingException, e.getCause());
    }
    // verify that the context has been closed, the operator snapshot result has been cancelled
    // and that all futures have been cancelled.
    verify(context).close();
    verify(operatorSnapshotResult).cancel();
    verify(futureKeyGroupStateHandle).cancel(anyBoolean());
    verify(futureOperatorStateHandle).cancel(anyBoolean());
    verify(futureKeyGroupStateHandle).cancel(anyBoolean());
}
Also used : CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) IOException(java.io.IOException) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 14 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class StateInitializationContextImplTest method setUp.

@Before
public void setUp() throws Exception {
    this.writtenKeyGroups = 0;
    this.writtenOperatorStates = new HashSet<>();
    this.closableRegistry = new CloseableRegistry();
    OperatorStateStore stateStore = mock(OperatorStateStore.class);
    ByteArrayOutputStreamWithPos out = new ByteArrayOutputStreamWithPos(64);
    List<KeyGroupsStateHandle> keyGroupsStateHandles = new ArrayList<>(NUM_HANDLES);
    int prev = 0;
    for (int i = 0; i < NUM_HANDLES; ++i) {
        out.reset();
        int size = i % 4;
        int end = prev + size;
        DataOutputView dov = new DataOutputViewStreamWrapper(out);
        KeyGroupRangeOffsets offsets = new KeyGroupRangeOffsets(i == 9 ? KeyGroupRange.EMPTY_KEY_GROUP_RANGE : new KeyGroupRange(prev, end));
        prev = end + 1;
        for (int kg : offsets.getKeyGroupRange()) {
            offsets.setKeyGroupOffset(kg, out.getPosition());
            dov.writeInt(kg);
            ++writtenKeyGroups;
        }
        KeyGroupsStateHandle handle = new KeyGroupsStateHandle(offsets, new ByteStateHandleCloseChecking("kg-" + i, out.toByteArray()));
        keyGroupsStateHandles.add(handle);
    }
    List<OperatorStateHandle> operatorStateHandles = new ArrayList<>(NUM_HANDLES);
    for (int i = 0; i < NUM_HANDLES; ++i) {
        int size = i % 4;
        out.reset();
        DataOutputView dov = new DataOutputViewStreamWrapper(out);
        LongArrayList offsets = new LongArrayList(size);
        for (int s = 0; s < size; ++s) {
            offsets.add(out.getPosition());
            int val = i * NUM_HANDLES + s;
            dov.writeInt(val);
            writtenOperatorStates.add(val);
        }
        Map<String, OperatorStateHandle.StateMetaInfo> offsetsMap = new HashMap<>();
        offsetsMap.put(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME, new OperatorStateHandle.StateMetaInfo(offsets.toArray(), OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
        OperatorStateHandle operatorStateHandle = new OperatorStateHandle(offsetsMap, new ByteStateHandleCloseChecking("os-" + i, out.toByteArray()));
        operatorStateHandles.add(operatorStateHandle);
    }
    this.initializationContext = new StateInitializationContextImpl(true, stateStore, mock(KeyedStateStore.class), keyGroupsStateHandles, operatorStateHandles, closableRegistry);
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) LongArrayList(org.apache.flink.runtime.util.LongArrayList) ArrayList(java.util.ArrayList) LongArrayList(org.apache.flink.runtime.util.LongArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) DataOutputView(org.apache.flink.core.memory.DataOutputView) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) StateInitializationContextImpl(org.apache.flink.runtime.state.StateInitializationContextImpl) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) ByteArrayOutputStreamWithPos(org.apache.flink.core.memory.ByteArrayOutputStreamWithPos) Before(org.junit.Before)

Example 15 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class SavepointV0Serializer method convertSubtaskState.

private org.apache.flink.runtime.checkpoint.SubtaskState convertSubtaskState(SubtaskState subtaskState, int parallelInstanceIdx, ClassLoader userClassLoader, long checkpointID) throws Exception {
    SerializedValue<StateHandle<?>> serializedValue = subtaskState.getState();
    StreamTaskStateList stateList = (StreamTaskStateList) serializedValue.deserializeValue(userClassLoader);
    StreamTaskState[] streamTaskStates = stateList.getState(userClassLoader);
    List<StreamStateHandle> newChainStateList = Arrays.asList(new StreamStateHandle[streamTaskStates.length]);
    KeyGroupsStateHandle newKeyedState = null;
    for (int chainIdx = 0; chainIdx < streamTaskStates.length; ++chainIdx) {
        StreamTaskState streamTaskState = streamTaskStates[chainIdx];
        if (streamTaskState == null) {
            continue;
        }
        newChainStateList.set(chainIdx, convertOperatorAndFunctionState(streamTaskState));
        HashMap<String, KvStateSnapshot<?, ?, ?, ?>> oldKeyedState = streamTaskState.getKvStates();
        if (null != oldKeyedState) {
            Preconditions.checkState(null == newKeyedState, "Found more than one keyed state in chain");
            newKeyedState = convertKeyedBackendState(oldKeyedState, parallelInstanceIdx, checkpointID);
        }
    }
    ChainedStateHandle<StreamStateHandle> newChainedState = new ChainedStateHandle<>(newChainStateList);
    ChainedStateHandle<OperatorStateHandle> nopChain = new ChainedStateHandle<>(Arrays.asList(new OperatorStateHandle[newChainedState.getLength()]));
    return new org.apache.flink.runtime.checkpoint.SubtaskState(newChainedState, nopChain, nopChain, newKeyedState, null);
}
Also used : KvStateSnapshot(org.apache.flink.migration.runtime.state.KvStateSnapshot) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle) SubtaskState(org.apache.flink.migration.runtime.checkpoint.SubtaskState) StreamTaskStateList(org.apache.flink.migration.streaming.runtime.tasks.StreamTaskStateList) MigrationKeyGroupStateHandle(org.apache.flink.migration.state.MigrationKeyGroupStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) AbstractFileStateHandle(org.apache.flink.migration.runtime.state.filesystem.AbstractFileStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) SerializedStateHandle(org.apache.flink.migration.runtime.state.memory.SerializedStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) StateHandle(org.apache.flink.migration.runtime.state.StateHandle) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) StreamTaskState(org.apache.flink.migration.streaming.runtime.tasks.StreamTaskState)

Aggregations

OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)30 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)20 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)18 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)11 Collection (java.util.Collection)8 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)8 Test (org.junit.Test)8 JobID (org.apache.flink.api.common.JobID)7 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)7 List (java.util.List)6 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)6 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)6 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)6 Map (java.util.Map)5 CloseableRegistry (org.apache.flink.core.fs.CloseableRegistry)5 ChainedStateHandle (org.apache.flink.runtime.state.ChainedStateHandle)5 TaskStateHandles (org.apache.flink.runtime.state.TaskStateHandles)5 Configuration (org.apache.flink.configuration.Configuration)4 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)4