Search in sources :

Example 41 with KeyedStateHandle

use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.

the class RocksDBIncrementalCheckpointUtilsTest method testChooseTheBestStateHandleForInitial.

@Test
public void testChooseTheBestStateHandleForInitial() {
    List<KeyedStateHandle> keyedStateHandles = new ArrayList<>(3);
    KeyedStateHandle keyedStateHandle1 = mock(KeyedStateHandle.class);
    when(keyedStateHandle1.getKeyGroupRange()).thenReturn(new KeyGroupRange(0, 3));
    keyedStateHandles.add(keyedStateHandle1);
    KeyedStateHandle keyedStateHandle2 = mock(KeyedStateHandle.class);
    when(keyedStateHandle2.getKeyGroupRange()).thenReturn(new KeyGroupRange(4, 7));
    keyedStateHandles.add(keyedStateHandle2);
    KeyedStateHandle keyedStateHandle3 = mock(KeyedStateHandle.class);
    when(keyedStateHandle3.getKeyGroupRange()).thenReturn(new KeyGroupRange(8, 12));
    keyedStateHandles.add(keyedStateHandle3);
    // this should choose no one handle.
    Assert.assertNull(RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(keyedStateHandles, new KeyGroupRange(3, 5)));
    // this should choose keyedStateHandle2, because keyedStateHandle2's key-group range
    // satisfies the overlap fraction demand.
    Assert.assertEquals(keyedStateHandle2, RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(keyedStateHandles, new KeyGroupRange(3, 6)));
    // both keyedStateHandle2 & keyedStateHandle3's key-group range satisfies the overlap
    // fraction, but keyedStateHandle3's key group range is better.
    Assert.assertEquals(keyedStateHandle3, RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(keyedStateHandles, new KeyGroupRange(5, 12)));
    // The intersect key group number of keyedStateHandle2 & keyedStateHandle3's with [4, 11]
    // are 4. But the over fraction of keyedStateHandle2 is better.
    Assert.assertEquals(keyedStateHandle2, RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(keyedStateHandles, new KeyGroupRange(4, 11)));
    // both keyedStateHandle2 & keyedStateHandle3's key-group range are covered by [3, 12],
    // but this should choose the keyedStateHandle3, because keyedStateHandle3's key-group is
    // bigger than keyedStateHandle2.
    Assert.assertEquals(keyedStateHandle3, RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(keyedStateHandles, new KeyGroupRange(3, 12)));
}
Also used : ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Test(org.junit.Test)

Example 42 with KeyedStateHandle

use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.

the class CheckpointCoordinatorTest method testSharedStateRegistrationOnRestore.

@Test
public void testSharedStateRegistrationOnRestore() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    int parallelism1 = 2;
    int maxParallelism1 = 4;
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).build();
    ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
    List<CompletedCheckpoint> checkpoints = Collections.emptyList();
    SharedStateRegistry firstInstance = SharedStateRegistry.DEFAULT_FACTORY.create(org.apache.flink.util.concurrent.Executors.directExecutor(), checkpoints);
    final EmbeddedCompletedCheckpointStore store = new EmbeddedCompletedCheckpointStore(10, checkpoints, firstInstance);
    // set up the coordinator and validate the initial state
    final CheckpointCoordinatorBuilder coordinatorBuilder = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor);
    final CheckpointCoordinator coordinator = coordinatorBuilder.setCompletedCheckpointStore(store).build();
    final int numCheckpoints = 3;
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    for (int i = 0; i < numCheckpoints; ++i) {
        performIncrementalCheckpoint(graph.getJobID(), coordinator, jobVertex1, keyGroupPartitions1, i);
    }
    List<CompletedCheckpoint> completedCheckpoints = coordinator.getSuccessfulCheckpoints();
    assertEquals(numCheckpoints, completedCheckpoints.size());
    int sharedHandleCount = 0;
    List<Map<StateHandleID, StreamStateHandle>> sharedHandlesByCheckpoint = new ArrayList<>(numCheckpoints);
    for (int i = 0; i < numCheckpoints; ++i) {
        sharedHandlesByCheckpoint.add(new HashMap<>(2));
    }
    int cp = 0;
    for (CompletedCheckpoint completedCheckpoint : completedCheckpoints) {
        for (OperatorState taskState : completedCheckpoint.getOperatorStates().values()) {
            for (OperatorSubtaskState subtaskState : taskState.getStates()) {
                for (KeyedStateHandle keyedStateHandle : subtaskState.getManagedKeyedState()) {
                    // test we are once registered with the current registry
                    verify(keyedStateHandle, times(1)).registerSharedStates(firstInstance, completedCheckpoint.getCheckpointID());
                    IncrementalRemoteKeyedStateHandle incrementalKeyedStateHandle = (IncrementalRemoteKeyedStateHandle) keyedStateHandle;
                    sharedHandlesByCheckpoint.get(cp).putAll(incrementalKeyedStateHandle.getSharedState());
                    for (StreamStateHandle streamStateHandle : incrementalKeyedStateHandle.getSharedState().values()) {
                        assertTrue(!(streamStateHandle instanceof PlaceholderStreamStateHandle));
                        verify(streamStateHandle, never()).discardState();
                        ++sharedHandleCount;
                    }
                    for (StreamStateHandle streamStateHandle : incrementalKeyedStateHandle.getPrivateState().values()) {
                        verify(streamStateHandle, never()).discardState();
                    }
                    verify(incrementalKeyedStateHandle.getMetaStateHandle(), never()).discardState();
                }
                verify(subtaskState, never()).discardState();
            }
        }
        ++cp;
    }
    // 2 (parallelism) x (1 (CP0) + 2 (CP1) + 2 (CP2)) = 10
    assertEquals(10, sharedHandleCount);
    // discard CP0
    store.removeOldestCheckpoint();
    // CP1
    for (Map<StateHandleID, StreamStateHandle> cpList : sharedHandlesByCheckpoint) {
        for (StreamStateHandle streamStateHandle : cpList.values()) {
            verify(streamStateHandle, never()).discardState();
        }
    }
    // shutdown the store
    store.shutdown(JobStatus.SUSPENDED, new CheckpointsCleaner());
    // restore the store
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(jobVertex1);
    assertEquals(JobStatus.SUSPENDED, store.getShutdownStatus().orElse(null));
    SharedStateRegistry secondInstance = SharedStateRegistry.DEFAULT_FACTORY.create(org.apache.flink.util.concurrent.Executors.directExecutor(), store.getAllCheckpoints());
    final EmbeddedCompletedCheckpointStore secondStore = new EmbeddedCompletedCheckpointStore(10, store.getAllCheckpoints(), secondInstance);
    final CheckpointCoordinator secondCoordinator = coordinatorBuilder.setCompletedCheckpointStore(secondStore).build();
    assertTrue(secondCoordinator.restoreLatestCheckpointedStateToAll(tasks, false));
    // validate that all shared states are registered again after the recovery.
    cp = 0;
    for (CompletedCheckpoint completedCheckpoint : completedCheckpoints) {
        for (OperatorState taskState : completedCheckpoint.getOperatorStates().values()) {
            for (OperatorSubtaskState subtaskState : taskState.getStates()) {
                for (KeyedStateHandle keyedStateHandle : subtaskState.getManagedKeyedState()) {
                    VerificationMode verificationMode;
                    // test we are once registered with the new registry
                    if (cp > 0) {
                        verificationMode = times(1);
                    } else {
                        verificationMode = never();
                    }
                    // check that all are registered with the new registry
                    verify(keyedStateHandle, verificationMode).registerSharedStates(secondInstance, completedCheckpoint.getCheckpointID());
                }
            }
        }
        ++cp;
    }
    // discard CP1
    secondStore.removeOldestCheckpoint();
    // we expect that all shared state from CP0 is no longer referenced and discarded. CP2 is
    // still live and also
    // references the state from CP1, so we expect they are not discarded.
    verifyDiscard(sharedHandlesByCheckpoint, cpId -> cpId == 0 ? times(1) : never());
    // discard CP2
    secondStore.removeOldestCheckpoint();
    // still expect shared state not to be discarded because it may be used in later checkpoints
    verifyDiscard(sharedHandlesByCheckpoint, cpId -> cpId == 1 ? never() : atLeast(0));
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) TestingStreamStateHandle(org.apache.flink.runtime.state.TestingStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) HashSet(java.util.HashSet) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) VerificationMode(org.mockito.verification.VerificationMode) StateHandleID(org.apache.flink.runtime.state.StateHandleID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Map(java.util.Map) HashMap(java.util.HashMap) Collections.singletonMap(java.util.Collections.singletonMap) Test(org.junit.Test)

Example 43 with KeyedStateHandle

use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.

the class CheckpointCoordinatorFailureTest method testFailingCompletedCheckpointStoreAdd.

/**
 * Tests that a failure while storing a completed checkpoint in the completed checkpoint store
 * will properly fail the originating pending checkpoint and clean upt the completed checkpoint.
 */
@Test
public void testFailingCompletedCheckpointStoreAdd() throws Exception {
    JobVertexID jobVertexId = new JobVertexID();
    final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    ExecutionGraph testGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).build();
    ExecutionVertex vertex = testGraph.getJobVertex(jobVertexId).getTaskVertices()[0];
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(testGraph).setCompletedCheckpointStore(new FailingCompletedCheckpointStore(new Exception("The failing completed checkpoint store failed again... :-("))).setTimer(manuallyTriggeredScheduledExecutor).build();
    coord.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertEquals(1, coord.getNumberOfPendingCheckpoints());
    PendingCheckpoint pendingCheckpoint = coord.getPendingCheckpoints().values().iterator().next();
    assertFalse(pendingCheckpoint.isDisposed());
    final long checkpointId = coord.getPendingCheckpoints().keySet().iterator().next();
    KeyedStateHandle managedKeyedHandle = mock(KeyedStateHandle.class);
    KeyedStateHandle rawKeyedHandle = mock(KeyedStateHandle.class);
    OperatorStateHandle managedOpHandle = mock(OperatorStreamStateHandle.class);
    OperatorStateHandle rawOpHandle = mock(OperatorStreamStateHandle.class);
    InputChannelStateHandle inputChannelStateHandle = new InputChannelStateHandle(new InputChannelInfo(0, 1), mock(StreamStateHandle.class), Collections.singletonList(1L));
    ResultSubpartitionStateHandle resultSubpartitionStateHandle = new ResultSubpartitionStateHandle(new ResultSubpartitionInfo(0, 1), mock(StreamStateHandle.class), Collections.singletonList(1L));
    final OperatorSubtaskState operatorSubtaskState = spy(OperatorSubtaskState.builder().setManagedOperatorState(managedOpHandle).setRawOperatorState(rawOpHandle).setManagedKeyedState(managedKeyedHandle).setRawKeyedState(rawKeyedHandle).setInputChannelState(StateObjectCollection.singleton(inputChannelStateHandle)).setResultSubpartitionState(StateObjectCollection.singleton(resultSubpartitionStateHandle)).build());
    TaskStateSnapshot subtaskState = spy(new TaskStateSnapshot());
    subtaskState.putSubtaskStateByOperatorID(new OperatorID(), operatorSubtaskState);
    when(subtaskState.getSubtaskStateByOperatorID(OperatorID.fromJobVertexID(vertex.getJobvertexId()))).thenReturn(operatorSubtaskState);
    AcknowledgeCheckpoint acknowledgeMessage = new AcknowledgeCheckpoint(testGraph.getJobID(), vertex.getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskState);
    try {
        coord.receiveAcknowledgeMessage(acknowledgeMessage, "Unknown location");
        fail("Expected a checkpoint exception because the completed checkpoint store could not " + "store the completed checkpoint.");
    } catch (CheckpointException e) {
    // ignore because we expected this exception
    }
    // make sure that the pending checkpoint has been discarded after we could not complete it
    assertTrue(pendingCheckpoint.isDisposed());
    // make sure that the subtask state has been discarded after we could not complete it.
    verify(operatorSubtaskState).discardState();
    verify(operatorSubtaskState.getManagedOperatorState().iterator().next()).discardState();
    verify(operatorSubtaskState.getRawOperatorState().iterator().next()).discardState();
    verify(operatorSubtaskState.getManagedKeyedState().iterator().next()).discardState();
    verify(operatorSubtaskState.getRawKeyedState().iterator().next()).discardState();
    verify(operatorSubtaskState.getInputChannelState().iterator().next().getDelegate()).discardState();
    verify(operatorSubtaskState.getResultSubpartitionState().iterator().next().getDelegate()).discardState();
}
Also used : InputChannelInfo(org.apache.flink.runtime.checkpoint.channel.InputChannelInfo) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) PossibleInconsistentStateException(org.apache.flink.runtime.persistence.PossibleInconsistentStateException) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ResultSubpartitionInfo(org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ResultSubpartitionStateHandle(org.apache.flink.runtime.state.ResultSubpartitionStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) InputChannelStateHandle(org.apache.flink.runtime.state.InputChannelStateHandle) Test(org.junit.Test)

Example 44 with KeyedStateHandle

use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.

the class CheckpointCoordinatorRestoringTest method testRestoreLatestCheckpointedStateWithChangingParallelism.

/**
 * Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
 * state.
 */
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = scaleOut ? 2 : 13;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    int newParallelism2 = scaleOut ? 13 : 2;
    CompletedCheckpointStore completedCheckpointStore = new EmbeddedCompletedCheckpointStore();
    final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).addJobVertex(jobVertexID2, parallelism2, maxParallelism2).build();
    final ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
    final ExecutionJobVertex jobVertex2 = graph.getJobVertex(jobVertexID2);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    // trigger the checkpoint
    coord.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertEquals(1, coord.getPendingCheckpoints().size());
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    // vertex 1
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        OperatorStateHandle opStateBackend = generatePartitionableStateHandle(jobVertexID1, index, 2, 8, false);
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
        OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(opStateBackend).setManagedKeyedState(keyedStateBackend).setRawKeyedState(keyedStateRaw).setInputChannelState(StateObjectCollection.singleton(createNewInputChannelStateHandle(3, new Random()))).build();
        TaskStateSnapshot taskOperatorSubtaskStates = new TaskStateSnapshot();
        taskOperatorSubtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(jobVertexID1), operatorSubtaskState);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(graph.getJobID(), jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint, TASK_MANAGER_LOCATION_INFO);
    }
    // vertex 2
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
        OperatorStateHandle opStateBackend = generatePartitionableStateHandle(jobVertexID2, index, 2, 8, false);
        OperatorStateHandle opStateRaw = generatePartitionableStateHandle(jobVertexID2, index, 2, 8, true);
        expectedOpStatesBackend.add(new ChainedStateHandle<>(singletonList(opStateBackend)));
        expectedOpStatesRaw.add(new ChainedStateHandle<>(singletonList(opStateRaw)));
        OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(opStateBackend).setRawOperatorState(opStateRaw).setManagedKeyedState(keyedStateBackend).setRawKeyedState(keyedStateRaw).build();
        TaskStateSnapshot taskOperatorSubtaskStates = new TaskStateSnapshot();
        taskOperatorSubtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(jobVertexID2), operatorSubtaskState);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(graph.getJobID(), jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint, TASK_MANAGER_LOCATION_INFO);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
    // rescale vertex 2
    final ExecutionGraph newGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).addJobVertex(jobVertexID2, newParallelism2, maxParallelism2).build();
    final ExecutionJobVertex newJobVertex1 = newGraph.getJobVertex(jobVertexID1);
    final ExecutionJobVertex newJobVertex2 = newGraph.getJobVertex(jobVertexID2);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator newCoord = new CheckpointCoordinatorBuilder().setExecutionGraph(newGraph).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(newJobVertex1);
    tasks.add(newJobVertex2);
    assertTrue(newCoord.restoreLatestCheckpointedStateToAll(tasks, false));
    // verify the restored state
    verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
    List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
    List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
    for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
        List<OperatorIDPair> operatorIDs = newJobVertex2.getOperatorIDs();
        KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
        KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
        JobManagerTaskRestore taskRestore = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
        Assert.assertEquals(1L, taskRestore.getRestoreCheckpointId());
        TaskStateSnapshot taskStateHandles = taskRestore.getTaskStateSnapshot();
        final int headOpIndex = operatorIDs.size() - 1;
        List<Collection<OperatorStateHandle>> allParallelManagedOpStates = new ArrayList<>(operatorIDs.size());
        List<Collection<OperatorStateHandle>> allParallelRawOpStates = new ArrayList<>(operatorIDs.size());
        for (int idx = 0; idx < operatorIDs.size(); ++idx) {
            OperatorID operatorID = operatorIDs.get(idx).getGeneratedOperatorID();
            OperatorSubtaskState opState = taskStateHandles.getSubtaskStateByOperatorID(operatorID);
            Collection<OperatorStateHandle> opStateBackend = opState.getManagedOperatorState();
            Collection<OperatorStateHandle> opStateRaw = opState.getRawOperatorState();
            allParallelManagedOpStates.add(opStateBackend);
            allParallelRawOpStates.add(opStateRaw);
            if (idx == headOpIndex) {
                Collection<KeyedStateHandle> keyedStateBackend = opState.getManagedKeyedState();
                Collection<KeyedStateHandle> keyGroupStateRaw = opState.getRawKeyedState();
                compareKeyedState(singletonList(originalKeyedStateBackend), keyedStateBackend);
                compareKeyedState(singletonList(originalKeyedStateRaw), keyGroupStateRaw);
            }
        }
        actualOpStatesBackend.add(allParallelManagedOpStates);
        actualOpStatesRaw.add(allParallelRawOpStates);
    }
    comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
    comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) Random(java.util.Random) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Collections.singletonList(java.util.Collections.singletonList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair)

Example 45 with KeyedStateHandle

use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.

the class ChangelogKeyedStateBackend method completeRestore.

private ChangelogSnapshotState completeRestore(Collection<ChangelogStateBackendHandle> stateHandles) {
    long materializationId = 0L;
    List<KeyedStateHandle> materialized = new ArrayList<>();
    List<ChangelogStateHandle> restoredNonMaterialized = new ArrayList<>();
    for (ChangelogStateBackendHandle h : stateHandles) {
        if (h != null) {
            materialized.addAll(h.getMaterializedStateHandles());
            restoredNonMaterialized.addAll(h.getNonMaterializedStateHandles());
            // choose max materializationID to handle rescaling
            materializationId = Math.max(materializationId, h.getMaterializationID());
        }
    }
    this.materializedId = materializationId + 1;
    return new ChangelogSnapshotState(materialized, restoredNonMaterialized, stateChangelogWriter.initialSequenceNumber(), materializationId);
}
Also used : ChangelogStateBackendHandle(org.apache.flink.runtime.state.changelog.ChangelogStateBackendHandle) ChangelogStateHandle(org.apache.flink.runtime.state.changelog.ChangelogStateHandle) ArrayList(java.util.ArrayList) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle)

Aggregations

KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)49 Test (org.junit.Test)16 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)15 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)14 ArrayList (java.util.ArrayList)10 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)10 IncrementalRemoteKeyedStateHandle (org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle)9 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)8 SnapshotResult (org.apache.flink.runtime.state.SnapshotResult)8 HashMap (java.util.HashMap)7 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)7 OperatorStreamStateHandle (org.apache.flink.runtime.state.OperatorStreamStateHandle)7 StateObjectCollection (org.apache.flink.runtime.checkpoint.StateObjectCollection)6 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)6 List (java.util.List)5 CloseableRegistry (org.apache.flink.core.fs.CloseableRegistry)5 InputChannelStateHandle (org.apache.flink.runtime.state.InputChannelStateHandle)5 ResultSubpartitionStateHandle (org.apache.flink.runtime.state.ResultSubpartitionStateHandle)5 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)5 Map (java.util.Map)4