Search in sources :

Example 16 with SharedStateRegistry

use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.

the class EmbeddedRocksDBStateBackendTest method testSharedIncrementalStateDeRegistration.

@Test
public void testSharedIncrementalStateDeRegistration() throws Exception {
    if (enableIncrementalCheckpointing) {
        CheckpointableKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE);
        try {
            ValueStateDescriptor<String> kvId = new ValueStateDescriptor<>("id", String.class, null);
            kvId.initializeSerializerUnlessSet(new ExecutionConfig());
            ValueState<String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
            Queue<IncrementalRemoteKeyedStateHandle> previousStateHandles = new LinkedList<>();
            SharedStateRegistry sharedStateRegistry = spy(new SharedStateRegistryImpl());
            for (int checkpointId = 0; checkpointId < 3; ++checkpointId) {
                reset(sharedStateRegistry);
                backend.setCurrentKey(checkpointId);
                state.update("Hello-" + checkpointId);
                RunnableFuture<SnapshotResult<KeyedStateHandle>> snapshot = backend.snapshot(checkpointId, checkpointId, createStreamFactory(), CheckpointOptions.forCheckpointWithDefaultLocation());
                snapshot.run();
                SnapshotResult<KeyedStateHandle> snapshotResult = snapshot.get();
                IncrementalRemoteKeyedStateHandle stateHandle = (IncrementalRemoteKeyedStateHandle) snapshotResult.getJobManagerOwnedSnapshot();
                Map<StateHandleID, StreamStateHandle> sharedState = new HashMap<>(stateHandle.getSharedState());
                stateHandle.registerSharedStates(sharedStateRegistry, checkpointId);
                for (Map.Entry<StateHandleID, StreamStateHandle> e : sharedState.entrySet()) {
                    verify(sharedStateRegistry).registerReference(stateHandle.createSharedStateRegistryKeyFromFileName(e.getKey()), e.getValue(), checkpointId);
                }
                previousStateHandles.add(stateHandle);
                ((CheckpointListener) backend).notifyCheckpointComplete(checkpointId);
                if (previousStateHandles.size() > 1) {
                    previousStateHandles.remove().discardState();
                }
            }
            while (!previousStateHandles.isEmpty()) {
                reset(sharedStateRegistry);
                previousStateHandles.remove().discardState();
            }
        } finally {
            IOUtils.closeQuietly(backend);
            backend.dispose();
        }
    }
}
Also used : SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) HashMap(java.util.HashMap) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) LinkedList(java.util.LinkedList) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) StateHandleID(org.apache.flink.runtime.state.StateHandleID) SharedStateRegistryImpl(org.apache.flink.runtime.state.SharedStateRegistryImpl) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 17 with SharedStateRegistry

use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.

the class CheckpointCoordinatorTest method testSharedStateRegistrationOnRestore.

@Test
public void testSharedStateRegistrationOnRestore() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    int parallelism1 = 2;
    int maxParallelism1 = 4;
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).build();
    ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
    List<CompletedCheckpoint> checkpoints = Collections.emptyList();
    SharedStateRegistry firstInstance = SharedStateRegistry.DEFAULT_FACTORY.create(org.apache.flink.util.concurrent.Executors.directExecutor(), checkpoints);
    final EmbeddedCompletedCheckpointStore store = new EmbeddedCompletedCheckpointStore(10, checkpoints, firstInstance);
    // set up the coordinator and validate the initial state
    final CheckpointCoordinatorBuilder coordinatorBuilder = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor);
    final CheckpointCoordinator coordinator = coordinatorBuilder.setCompletedCheckpointStore(store).build();
    final int numCheckpoints = 3;
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    for (int i = 0; i < numCheckpoints; ++i) {
        performIncrementalCheckpoint(graph.getJobID(), coordinator, jobVertex1, keyGroupPartitions1, i);
    }
    List<CompletedCheckpoint> completedCheckpoints = coordinator.getSuccessfulCheckpoints();
    assertEquals(numCheckpoints, completedCheckpoints.size());
    int sharedHandleCount = 0;
    List<Map<StateHandleID, StreamStateHandle>> sharedHandlesByCheckpoint = new ArrayList<>(numCheckpoints);
    for (int i = 0; i < numCheckpoints; ++i) {
        sharedHandlesByCheckpoint.add(new HashMap<>(2));
    }
    int cp = 0;
    for (CompletedCheckpoint completedCheckpoint : completedCheckpoints) {
        for (OperatorState taskState : completedCheckpoint.getOperatorStates().values()) {
            for (OperatorSubtaskState subtaskState : taskState.getStates()) {
                for (KeyedStateHandle keyedStateHandle : subtaskState.getManagedKeyedState()) {
                    // test we are once registered with the current registry
                    verify(keyedStateHandle, times(1)).registerSharedStates(firstInstance, completedCheckpoint.getCheckpointID());
                    IncrementalRemoteKeyedStateHandle incrementalKeyedStateHandle = (IncrementalRemoteKeyedStateHandle) keyedStateHandle;
                    sharedHandlesByCheckpoint.get(cp).putAll(incrementalKeyedStateHandle.getSharedState());
                    for (StreamStateHandle streamStateHandle : incrementalKeyedStateHandle.getSharedState().values()) {
                        assertTrue(!(streamStateHandle instanceof PlaceholderStreamStateHandle));
                        verify(streamStateHandle, never()).discardState();
                        ++sharedHandleCount;
                    }
                    for (StreamStateHandle streamStateHandle : incrementalKeyedStateHandle.getPrivateState().values()) {
                        verify(streamStateHandle, never()).discardState();
                    }
                    verify(incrementalKeyedStateHandle.getMetaStateHandle(), never()).discardState();
                }
                verify(subtaskState, never()).discardState();
            }
        }
        ++cp;
    }
    // 2 (parallelism) x (1 (CP0) + 2 (CP1) + 2 (CP2)) = 10
    assertEquals(10, sharedHandleCount);
    // discard CP0
    store.removeOldestCheckpoint();
    // CP1
    for (Map<StateHandleID, StreamStateHandle> cpList : sharedHandlesByCheckpoint) {
        for (StreamStateHandle streamStateHandle : cpList.values()) {
            verify(streamStateHandle, never()).discardState();
        }
    }
    // shutdown the store
    store.shutdown(JobStatus.SUSPENDED, new CheckpointsCleaner());
    // restore the store
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(jobVertex1);
    assertEquals(JobStatus.SUSPENDED, store.getShutdownStatus().orElse(null));
    SharedStateRegistry secondInstance = SharedStateRegistry.DEFAULT_FACTORY.create(org.apache.flink.util.concurrent.Executors.directExecutor(), store.getAllCheckpoints());
    final EmbeddedCompletedCheckpointStore secondStore = new EmbeddedCompletedCheckpointStore(10, store.getAllCheckpoints(), secondInstance);
    final CheckpointCoordinator secondCoordinator = coordinatorBuilder.setCompletedCheckpointStore(secondStore).build();
    assertTrue(secondCoordinator.restoreLatestCheckpointedStateToAll(tasks, false));
    // validate that all shared states are registered again after the recovery.
    cp = 0;
    for (CompletedCheckpoint completedCheckpoint : completedCheckpoints) {
        for (OperatorState taskState : completedCheckpoint.getOperatorStates().values()) {
            for (OperatorSubtaskState subtaskState : taskState.getStates()) {
                for (KeyedStateHandle keyedStateHandle : subtaskState.getManagedKeyedState()) {
                    VerificationMode verificationMode;
                    // test we are once registered with the new registry
                    if (cp > 0) {
                        verificationMode = times(1);
                    } else {
                        verificationMode = never();
                    }
                    // check that all are registered with the new registry
                    verify(keyedStateHandle, verificationMode).registerSharedStates(secondInstance, completedCheckpoint.getCheckpointID());
                }
            }
        }
        ++cp;
    }
    // discard CP1
    secondStore.removeOldestCheckpoint();
    // we expect that all shared state from CP0 is no longer referenced and discarded. CP2 is
    // still live and also
    // references the state from CP1, so we expect they are not discarded.
    verifyDiscard(sharedHandlesByCheckpoint, cpId -> cpId == 0 ? times(1) : never());
    // discard CP2
    secondStore.removeOldestCheckpoint();
    // still expect shared state not to be discarded because it may be used in later checkpoints
    verifyDiscard(sharedHandlesByCheckpoint, cpId -> cpId == 1 ? never() : atLeast(0));
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) TestingStreamStateHandle(org.apache.flink.runtime.state.TestingStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) HashSet(java.util.HashSet) PlaceholderStreamStateHandle(org.apache.flink.runtime.state.PlaceholderStreamStateHandle) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) VerificationMode(org.mockito.verification.VerificationMode) StateHandleID(org.apache.flink.runtime.state.StateHandleID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Map(java.util.Map) HashMap(java.util.HashMap) Collections.singletonMap(java.util.Collections.singletonMap) Test(org.junit.Test)

Example 18 with SharedStateRegistry

use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.

the class CompletedCheckpointStoreTest method testAcquireLatestCompletedCheckpointId.

@Test
public void testAcquireLatestCompletedCheckpointId() throws Exception {
    SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
    CompletedCheckpointStore checkpoints = createRecoveredCompletedCheckpointStore(1);
    assertEquals(0, checkpoints.getLatestCheckpointId());
    checkpoints.addCheckpointAndSubsumeOldestOne(createCheckpoint(2, sharedStateRegistry), new CheckpointsCleaner(), () -> {
    });
    assertEquals(2, checkpoints.getLatestCheckpointId());
    checkpoints.addCheckpointAndSubsumeOldestOne(createCheckpoint(4, sharedStateRegistry), new CheckpointsCleaner(), () -> {
    });
    assertEquals(4, checkpoints.getLatestCheckpointId());
}
Also used : SharedStateRegistryImpl(org.apache.flink.runtime.state.SharedStateRegistryImpl) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) Test(org.junit.Test)

Example 19 with SharedStateRegistry

use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.

the class CompletedCheckpointStoreTest method testAddCheckpointMoreThanMaxRetained.

/**
 * Tests that adding more checkpoints than retained discards the correct checkpoints (using the
 * correct class loader).
 */
@Test
public void testAddCheckpointMoreThanMaxRetained() throws Exception {
    SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
    CompletedCheckpointStore checkpoints = createRecoveredCompletedCheckpointStore(1);
    TestCompletedCheckpoint[] expected = new TestCompletedCheckpoint[] { createCheckpoint(0, sharedStateRegistry), createCheckpoint(1, sharedStateRegistry), createCheckpoint(2, sharedStateRegistry), createCheckpoint(3, sharedStateRegistry) };
    // Add checkpoints
    checkpoints.addCheckpointAndSubsumeOldestOne(expected[0], new CheckpointsCleaner(), () -> {
    });
    assertEquals(1, checkpoints.getNumberOfRetainedCheckpoints());
    for (int i = 1; i < expected.length; i++) {
        checkpoints.addCheckpointAndSubsumeOldestOne(expected[i], new CheckpointsCleaner(), () -> {
        });
        // The ZooKeeper implementation discards asynchronously
        expected[i - 1].awaitDiscard();
        assertTrue(expected[i - 1].isDiscarded());
        assertEquals(1, checkpoints.getNumberOfRetainedCheckpoints());
    }
}
Also used : SharedStateRegistryImpl(org.apache.flink.runtime.state.SharedStateRegistryImpl) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) Test(org.junit.Test)

Example 20 with SharedStateRegistry

use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.

the class CompletedCheckpointStoreTest method testAddAndGetLatestCheckpoint.

/**
 * Tests adding and getting a checkpoint.
 */
@Test
public void testAddAndGetLatestCheckpoint() throws Exception {
    SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
    CompletedCheckpointStore checkpoints = createRecoveredCompletedCheckpointStore(4);
    // Empty state
    assertEquals(0, checkpoints.getNumberOfRetainedCheckpoints());
    assertEquals(0, checkpoints.getAllCheckpoints().size());
    TestCompletedCheckpoint[] expected = new TestCompletedCheckpoint[] { createCheckpoint(0, sharedStateRegistry), createCheckpoint(1, sharedStateRegistry) };
    // Add and get latest
    checkpoints.addCheckpointAndSubsumeOldestOne(expected[0], new CheckpointsCleaner(), () -> {
    });
    assertEquals(1, checkpoints.getNumberOfRetainedCheckpoints());
    verifyCheckpoint(expected[0], checkpoints.getLatestCheckpoint());
    checkpoints.addCheckpointAndSubsumeOldestOne(expected[1], new CheckpointsCleaner(), () -> {
    });
    assertEquals(2, checkpoints.getNumberOfRetainedCheckpoints());
    verifyCheckpoint(expected[1], checkpoints.getLatestCheckpoint());
}
Also used : SharedStateRegistryImpl(org.apache.flink.runtime.state.SharedStateRegistryImpl) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) Test(org.junit.Test)

Aggregations

SharedStateRegistry (org.apache.flink.runtime.state.SharedStateRegistry)22 SharedStateRegistryImpl (org.apache.flink.runtime.state.SharedStateRegistryImpl)19 Test (org.junit.Test)19 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)5 JobID (org.apache.flink.api.common.JobID)3 Configuration (org.apache.flink.configuration.Configuration)3 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)3 IncrementalRemoteKeyedStateHandle (org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle)3 StateHandleID (org.apache.flink.runtime.state.StateHandleID)3 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)3 TestCompletedCheckpointStorageLocation (org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation)3 HashSet (java.util.HashSet)2 Map (java.util.Map)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)2 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)2 CheckpointRequestDeciderTest.regularCheckpoint (org.apache.flink.runtime.checkpoint.CheckpointRequestDeciderTest.regularCheckpoint)2 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)2