Search in sources :

Example 1 with TestCompletedCheckpointStorageLocation

use of org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation in project flink by apache.

the class CheckpointCoordinatorRestoringTest method testStateRecoveryWithTopologyChange.

/**
 * old topology. [operator1,operator2] * parallelism1 -> [operator3,operator4] * parallelism2
 *
 * <p>new topology
 *
 * <p>[operator5,operator1,operator3] * newParallelism1 -> [operator3, operator6] *
 * newParallelism2
 */
public void testStateRecoveryWithTopologyChange(TestScaleType scaleType) throws Exception {
    /*
         * Old topology
         * CHAIN(op1 -> op2) * parallelism1 -> CHAIN(op3 -> op4) * parallelism2
         */
    Tuple2<JobVertexID, OperatorID> id1 = generateIDPair();
    Tuple2<JobVertexID, OperatorID> id2 = generateIDPair();
    int parallelism1 = 10;
    int maxParallelism1 = 64;
    Tuple2<JobVertexID, OperatorID> id3 = generateIDPair();
    Tuple2<JobVertexID, OperatorID> id4 = generateIDPair();
    int parallelism2 = 10;
    int maxParallelism2 = 64;
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    Map<OperatorID, OperatorState> operatorStates = new HashMap<>();
    // prepare vertex1 state
    for (Tuple2<JobVertexID, OperatorID> id : Arrays.asList(id1, id2)) {
        OperatorState taskState = new OperatorState(id.f1, parallelism1, maxParallelism1);
        operatorStates.put(id.f1, taskState);
        for (int index = 0; index < taskState.getParallelism(); index++) {
            OperatorSubtaskState subtaskState = OperatorSubtaskState.builder().setManagedOperatorState(generatePartitionableStateHandle(id.f0, index, 2, 8, false)).setRawOperatorState(generatePartitionableStateHandle(id.f0, index, 2, 8, true)).build();
            taskState.putState(index, subtaskState);
        }
    }
    List<List<ChainedStateHandle<OperatorStateHandle>>> expectedManagedOperatorStates = new ArrayList<>();
    List<List<ChainedStateHandle<OperatorStateHandle>>> expectedRawOperatorStates = new ArrayList<>();
    // prepare vertex2 state
    for (Tuple2<JobVertexID, OperatorID> id : Arrays.asList(id3, id4)) {
        OperatorState operatorState = new OperatorState(id.f1, parallelism2, maxParallelism2);
        operatorStates.put(id.f1, operatorState);
        List<ChainedStateHandle<OperatorStateHandle>> expectedManagedOperatorState = new ArrayList<>();
        List<ChainedStateHandle<OperatorStateHandle>> expectedRawOperatorState = new ArrayList<>();
        expectedManagedOperatorStates.add(expectedManagedOperatorState);
        expectedRawOperatorStates.add(expectedRawOperatorState);
        for (int index = 0; index < operatorState.getParallelism(); index++) {
            final OperatorSubtaskState.Builder stateBuilder = OperatorSubtaskState.builder();
            OperatorStateHandle subManagedOperatorState = generateChainedPartitionableStateHandle(id.f0, index, 2, 8, false).get(0);
            OperatorStateHandle subRawOperatorState = generateChainedPartitionableStateHandle(id.f0, index, 2, 8, true).get(0);
            if (id.f0.equals(id3.f0)) {
                stateBuilder.setManagedKeyedState(generateKeyGroupState(id.f0, keyGroupPartitions2.get(index), false));
            }
            if (id.f0.equals(id3.f0)) {
                stateBuilder.setRawKeyedState(generateKeyGroupState(id.f0, keyGroupPartitions2.get(index), true));
            }
            expectedManagedOperatorState.add(ChainedStateHandle.wrapSingleHandle(subManagedOperatorState));
            expectedRawOperatorState.add(ChainedStateHandle.wrapSingleHandle(subRawOperatorState));
            OperatorSubtaskState subtaskState = stateBuilder.setManagedOperatorState(subManagedOperatorState).setRawOperatorState(subRawOperatorState).build();
            operatorState.putState(index, subtaskState);
        }
    }
    /*
         * New topology
         * CHAIN(op5 -> op1 -> op2) * newParallelism1 -> CHAIN(op3 -> op6) * newParallelism2
         */
    Tuple2<JobVertexID, OperatorID> id5 = generateIDPair();
    int newParallelism1 = 10;
    Tuple2<JobVertexID, OperatorID> id6 = generateIDPair();
    int newParallelism2 = parallelism2;
    if (scaleType == TestScaleType.INCREASE_PARALLELISM) {
        newParallelism2 = 20;
    } else if (scaleType == TestScaleType.DECREASE_PARALLELISM) {
        newParallelism2 = 8;
    }
    List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
    ExecutionGraph newGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(id5.f0, newParallelism1, maxParallelism1, Stream.of(id2.f1, id1.f1, id5.f1).map(OperatorIDPair::generatedIDOnly).collect(Collectors.toList()), true).addJobVertex(id3.f0, newParallelism2, maxParallelism2, Stream.of(id6.f1, id3.f1).map(OperatorIDPair::generatedIDOnly).collect(Collectors.toList()), true).build();
    ExecutionJobVertex newJobVertex1 = newGraph.getJobVertex(id5.f0);
    ExecutionJobVertex newJobVertex2 = newGraph.getJobVertex(id3.f0);
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(newJobVertex1);
    tasks.add(newJobVertex2);
    CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(newGraph.getJobID(), 2, System.currentTimeMillis(), System.currentTimeMillis() + 3000, operatorStates, Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    // set up the coordinator and validate the initial state
    SharedStateRegistry sharedStateRegistry = SharedStateRegistry.DEFAULT_FACTORY.create(Executors.directExecutor(), emptyList());
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(newGraph).setCompletedCheckpointStore(storeFor(sharedStateRegistry, () -> {
    }, completedCheckpoint)).setTimer(manuallyTriggeredScheduledExecutor).build();
    coord.restoreLatestCheckpointedStateToAll(tasks, true);
    for (int i = 0; i < newJobVertex1.getParallelism(); i++) {
        final List<OperatorIDPair> operatorIDs = newJobVertex1.getOperatorIDs();
        JobManagerTaskRestore taskRestore = newJobVertex1.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
        Assert.assertEquals(2L, taskRestore.getRestoreCheckpointId());
        TaskStateSnapshot stateSnapshot = taskRestore.getTaskStateSnapshot();
        OperatorSubtaskState headOpState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIDs.size() - 1).getGeneratedOperatorID());
        assertTrue(headOpState.getManagedKeyedState().isEmpty());
        assertTrue(headOpState.getRawKeyedState().isEmpty());
        // operator5
        {
            int operatorIndexInChain = 2;
            OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
            assertTrue(opState.getManagedOperatorState().isEmpty());
            assertTrue(opState.getRawOperatorState().isEmpty());
        }
        // operator1
        {
            int operatorIndexInChain = 1;
            OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
            OperatorStateHandle expectedManagedOpState = generatePartitionableStateHandle(id1.f0, i, 2, 8, false);
            OperatorStateHandle expectedRawOpState = generatePartitionableStateHandle(id1.f0, i, 2, 8, true);
            Collection<OperatorStateHandle> managedOperatorState = opState.getManagedOperatorState();
            assertEquals(1, managedOperatorState.size());
            assertTrue(CommonTestUtils.isStreamContentEqual(expectedManagedOpState.openInputStream(), managedOperatorState.iterator().next().openInputStream()));
            Collection<OperatorStateHandle> rawOperatorState = opState.getRawOperatorState();
            assertEquals(1, rawOperatorState.size());
            assertTrue(CommonTestUtils.isStreamContentEqual(expectedRawOpState.openInputStream(), rawOperatorState.iterator().next().openInputStream()));
        }
        // operator2
        {
            int operatorIndexInChain = 0;
            OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
            OperatorStateHandle expectedManagedOpState = generatePartitionableStateHandle(id2.f0, i, 2, 8, false);
            OperatorStateHandle expectedRawOpState = generatePartitionableStateHandle(id2.f0, i, 2, 8, true);
            Collection<OperatorStateHandle> managedOperatorState = opState.getManagedOperatorState();
            assertEquals(1, managedOperatorState.size());
            assertTrue(CommonTestUtils.isStreamContentEqual(expectedManagedOpState.openInputStream(), managedOperatorState.iterator().next().openInputStream()));
            Collection<OperatorStateHandle> rawOperatorState = opState.getRawOperatorState();
            assertEquals(1, rawOperatorState.size());
            assertTrue(CommonTestUtils.isStreamContentEqual(expectedRawOpState.openInputStream(), rawOperatorState.iterator().next().openInputStream()));
        }
    }
    List<List<Collection<OperatorStateHandle>>> actualManagedOperatorStates = new ArrayList<>(newJobVertex2.getParallelism());
    List<List<Collection<OperatorStateHandle>>> actualRawOperatorStates = new ArrayList<>(newJobVertex2.getParallelism());
    for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
        final List<OperatorIDPair> operatorIDs = newJobVertex2.getOperatorIDs();
        JobManagerTaskRestore taskRestore = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
        Assert.assertEquals(2L, taskRestore.getRestoreCheckpointId());
        TaskStateSnapshot stateSnapshot = taskRestore.getTaskStateSnapshot();
        // operator 3
        {
            int operatorIndexInChain = 1;
            OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
            List<Collection<OperatorStateHandle>> actualSubManagedOperatorState = new ArrayList<>(1);
            actualSubManagedOperatorState.add(opState.getManagedOperatorState());
            List<Collection<OperatorStateHandle>> actualSubRawOperatorState = new ArrayList<>(1);
            actualSubRawOperatorState.add(opState.getRawOperatorState());
            actualManagedOperatorStates.add(actualSubManagedOperatorState);
            actualRawOperatorStates.add(actualSubRawOperatorState);
        }
        // operator 6
        {
            int operatorIndexInChain = 0;
            OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
            assertTrue(opState.getManagedOperatorState().isEmpty());
            assertTrue(opState.getRawOperatorState().isEmpty());
        }
        KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(id3.f0, newKeyGroupPartitions2.get(i), false);
        KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(id3.f0, newKeyGroupPartitions2.get(i), true);
        OperatorSubtaskState headOpState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIDs.size() - 1).getGeneratedOperatorID());
        Collection<KeyedStateHandle> keyedStateBackend = headOpState.getManagedKeyedState();
        Collection<KeyedStateHandle> keyGroupStateRaw = headOpState.getRawKeyedState();
        compareKeyedState(singletonList(originalKeyedStateBackend), keyedStateBackend);
        compareKeyedState(singletonList(originalKeyedStateRaw), keyGroupStateRaw);
    }
    comparePartitionableState(expectedManagedOperatorStates.get(0), actualManagedOperatorStates);
    comparePartitionableState(expectedRawOperatorStates.get(0), actualRawOperatorStates);
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Collections.singletonList(java.util.Collections.singletonList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair)

Example 2 with TestCompletedCheckpointStorageLocation

use of org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation in project flink by apache.

the class CheckpointCoordinatorRestoringTest method testRestoreFinishedStateWithoutInFlightData.

@Test
public void testRestoreFinishedStateWithoutInFlightData() throws Exception {
    // given: Operator with not empty states.
    OperatorIDPair op1 = OperatorIDPair.generatedIDOnly(new OperatorID());
    final JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID, 1, 1, singletonList(op1), true).build();
    CompletedCheckpointStore completedCheckpointStore = new EmbeddedCompletedCheckpointStore();
    Map<OperatorID, OperatorState> operatorStates = new HashMap<>();
    operatorStates.put(op1.getGeneratedOperatorID(), new FullyFinishedOperatorState(op1.getGeneratedOperatorID(), 1, 1));
    CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(graph.getJobID(), 2, System.currentTimeMillis(), System.currentTimeMillis() + 3000, operatorStates, Collections.emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    completedCheckpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, new CheckpointsCleaner(), () -> {
    });
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(new CheckpointCoordinatorConfigurationBuilder().setCheckpointIdOfIgnoredInFlightData(2).build()).setCompletedCheckpointStore(completedCheckpointStore).build();
    ExecutionJobVertex vertex = graph.getJobVertex(jobVertexID);
    coord.restoreInitialCheckpointIfPresent(Collections.singleton(vertex));
    TaskStateSnapshot restoredState = vertex.getTaskVertices()[0].getCurrentExecutionAttempt().getTaskRestore().getTaskStateSnapshot();
    assertTrue(restoredState.isTaskDeployedAsFinished());
}
Also used : HashMap(java.util.HashMap) CheckpointCoordinatorConfigurationBuilder(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) Test(org.junit.Test)

Example 3 with TestCompletedCheckpointStorageLocation

use of org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation in project flink by apache.

the class CheckpointCoordinatorRestoringTest method testJobGraphModificationsAreCheckedForInitialCheckpoint.

@Test
public void testJobGraphModificationsAreCheckedForInitialCheckpoint() throws Exception {
    final JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID, 1, 1).build();
    CompletedCheckpointStore completedCheckpointStore = new EmbeddedCompletedCheckpointStore();
    CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(graph.getJobID(), 2, System.currentTimeMillis(), System.currentTimeMillis() + 3000, Collections.emptyMap(), Collections.emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    completedCheckpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, new CheckpointsCleaner(), () -> {
    });
    BooleanValue checked = new BooleanValue(false);
    CheckpointCoordinator restoreCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(completedCheckpointStore).setVertexFinishedStateCheckerFactory((vertices, states) -> new VertexFinishedStateChecker(vertices, states) {

        @Override
        public void validateOperatorsFinishedState() {
            checked.set(true);
        }
    }).build();
    restoreCoordinator.restoreInitialCheckpointIfPresent(new HashSet<>(graph.getAllVertices().values()));
    assertTrue("The finished states should be checked when job is restored on startup", checked.get());
}
Also used : CheckpointCoordinatorTestingUtils.generatePartitionableStateHandle(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.generatePartitionableStateHandle) BooleanValue(org.apache.flink.types.BooleanValue) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArgumentMatchers.eq(org.mockito.ArgumentMatchers.eq) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) Random(java.util.Random) CheckpointCoordinatorTestingUtils.mockSubtaskState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.mockSubtaskState) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Collections.singletonList(java.util.Collections.singletonList) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) StateHandleDummyUtil.createNewResultSubpartitionStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewResultSubpartitionStateHandle) Assert.fail(org.junit.Assert.fail) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) CheckpointCoordinatorConfigurationBuilder(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder) Collections.emptyList(java.util.Collections.emptyList) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) StateHandleDummyUtil.createNewInputChannelStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewInputChannelStateHandle) Stream(java.util.stream.Stream) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CheckpointCoordinatorTestingUtils.generateChainedPartitionableStateHandle(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.generateChainedPartitionableStateHandle) JobStatus(org.apache.flink.api.common.JobStatus) ArrayList(java.util.ArrayList) Execution(org.apache.flink.runtime.executiongraph.Execution) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) Before(org.junit.Before) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) CheckpointCoordinatorTestingUtils.generateKeyGroupState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.generateKeyGroupState) File(java.io.File) Mockito.verify(org.mockito.Mockito.verify) CheckpointCoordinatorTestingUtils.comparePartitionableState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.comparePartitionableState) Executors(org.apache.flink.util.concurrent.Executors) Rule(org.junit.Rule) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Assert(org.junit.Assert) CheckpointCoordinatorTestingUtils.compareKeyedState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.compareKeyedState) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) CheckpointCoordinatorTestingUtils.verifyStateRestore(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.verifyStateRestore) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) BooleanValue(org.apache.flink.types.BooleanValue) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 4 with TestCompletedCheckpointStorageLocation

use of org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation in project flink by apache.

the class CheckpointCoordinatorMasterHooksTest method testHooksAreCalledOnRestore.

@Test
public void testHooksAreCalledOnRestore() throws Exception {
    final String id1 = "id1";
    final String id2 = "id2";
    final String state1 = "the-test-string-state";
    final byte[] state1serialized = new StringSerializer().serialize(state1);
    final long state2 = 987654321L;
    final byte[] state2serialized = new LongSerializer().serialize(state2);
    final List<MasterState> masterHookStates = Arrays.asList(new MasterState(id1, state1serialized, StringSerializer.VERSION), new MasterState(id2, state2serialized, LongSerializer.VERSION));
    final MasterTriggerRestoreHook<String> statefulHook1 = mockGeneric(MasterTriggerRestoreHook.class);
    when(statefulHook1.getIdentifier()).thenReturn(id1);
    when(statefulHook1.createCheckpointDataSerializer()).thenReturn(new StringSerializer());
    when(statefulHook1.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenThrow(new Exception("not expected"));
    final MasterTriggerRestoreHook<Long> statefulHook2 = mockGeneric(MasterTriggerRestoreHook.class);
    when(statefulHook2.getIdentifier()).thenReturn(id2);
    when(statefulHook2.createCheckpointDataSerializer()).thenReturn(new LongSerializer());
    when(statefulHook2.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenThrow(new Exception("not expected"));
    final MasterTriggerRestoreHook<Void> statelessHook = mockGeneric(MasterTriggerRestoreHook.class);
    when(statelessHook.getIdentifier()).thenReturn("some-id");
    final JobID jid = new JobID();
    final long checkpointId = 13L;
    final CompletedCheckpoint checkpoint = new CompletedCheckpoint(jid, checkpointId, 123L, 125L, Collections.<OperatorID, OperatorState>emptyMap(), masterHookStates, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
    CheckpointCoordinator cc = instantiateCheckpointCoordinator(graph);
    cc.addMasterHook(statefulHook1);
    cc.addMasterHook(statelessHook);
    cc.addMasterHook(statefulHook2);
    cc.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
    });
    cc.restoreLatestCheckpointedStateToAll(Collections.emptySet(), false);
    verify(statefulHook1, times(1)).restoreCheckpoint(eq(checkpointId), eq(state1));
    verify(statefulHook2, times(1)).restoreCheckpoint(eq(checkpointId), eq(state2));
    verify(statelessHook, times(1)).restoreCheckpoint(eq(checkpointId), isNull(Void.class));
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) IOException(java.io.IOException) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Executor(java.util.concurrent.Executor) Mockito.anyLong(org.mockito.Mockito.anyLong) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) StringSerializer(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.StringSerializer) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with TestCompletedCheckpointStorageLocation

use of org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation in project flink by apache.

the class CheckpointStateRestoreTest method testNonRestoredState.

/**
 * Tests that the allow non restored state flag is correctly handled.
 *
 * <p>The flag only applies for state that is part of the checkpoint.
 */
@Test
public void testNonRestoredState() throws Exception {
    // --- (1) Create tasks to restore checkpoint with ---
    JobVertexID jobVertexId1 = new JobVertexID();
    JobVertexID jobVertexId2 = new JobVertexID();
    OperatorID operatorId1 = OperatorID.fromJobVertexID(jobVertexId1);
    // 1st JobVertex
    ExecutionVertex vertex11 = mockExecutionVertex(mockExecution(), jobVertexId1, 0, 3);
    ExecutionVertex vertex12 = mockExecutionVertex(mockExecution(), jobVertexId1, 1, 3);
    ExecutionVertex vertex13 = mockExecutionVertex(mockExecution(), jobVertexId1, 2, 3);
    // 2nd JobVertex
    ExecutionVertex vertex21 = mockExecutionVertex(mockExecution(), jobVertexId2, 0, 2);
    ExecutionVertex vertex22 = mockExecutionVertex(mockExecution(), jobVertexId2, 1, 2);
    ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexId1, new ExecutionVertex[] { vertex11, vertex12, vertex13 });
    ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexId2, new ExecutionVertex[] { vertex21, vertex22 });
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(jobVertex1);
    tasks.add(jobVertex2);
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().build();
    // --- (2) Checkpoint misses state for a jobVertex (should work) ---
    Map<OperatorID, OperatorState> checkpointTaskStates = new HashMap<>();
    {
        OperatorState taskState = new OperatorState(operatorId1, 3, 3);
        taskState.putState(0, OperatorSubtaskState.builder().build());
        taskState.putState(1, OperatorSubtaskState.builder().build());
        taskState.putState(2, OperatorSubtaskState.builder().build());
        checkpointTaskStates.put(operatorId1, taskState);
    }
    CompletedCheckpoint checkpoint = new CompletedCheckpoint(new JobID(), 0, 1, 2, new HashMap<>(checkpointTaskStates), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    coord.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
    });
    assertTrue(coord.restoreLatestCheckpointedStateToAll(tasks, false));
    assertTrue(coord.restoreLatestCheckpointedStateToAll(tasks, true));
    // --- (3) JobVertex missing for task state that is part of the checkpoint ---
    JobVertexID newJobVertexID = new JobVertexID();
    OperatorID newOperatorID = OperatorID.fromJobVertexID(newJobVertexID);
    // There is no task for this
    {
        OperatorState taskState = new OperatorState(newOperatorID, 1, 1);
        taskState.putState(0, OperatorSubtaskState.builder().build());
        checkpointTaskStates.put(newOperatorID, taskState);
    }
    checkpoint = new CompletedCheckpoint(new JobID(), 1, 2, 3, new HashMap<>(checkpointTaskStates), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    coord.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
    });
    // (i) Allow non restored state (should succeed)
    final boolean restored = coord.restoreLatestCheckpointedStateToAll(tasks, true);
    assertTrue(restored);
    // (ii) Don't allow non restored state (should fail)
    try {
        coord.restoreLatestCheckpointedStateToAll(tasks, false);
        fail("Did not throw the expected Exception.");
    } catch (IllegalStateException ignored) {
    }
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobID(org.apache.flink.api.common.JobID) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

TestCompletedCheckpointStorageLocation (org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation)18 Test (org.junit.Test)15 JobID (org.apache.flink.api.common.JobID)14 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)8 HashMap (java.util.HashMap)7 ArrayList (java.util.ArrayList)6 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)6 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)5 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)4 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)4 SharedStateRegistry (org.apache.flink.runtime.state.SharedStateRegistry)4 HashSet (java.util.HashSet)3 OperatorIDPair (org.apache.flink.runtime.OperatorIDPair)3 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 Collections.emptyList (java.util.Collections.emptyList)2 Collections.singletonList (java.util.Collections.singletonList)2 List (java.util.List)2 CountDownLatch (java.util.concurrent.CountDownLatch)2