Search in sources :

Example 36 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class AggregatingAlignedProcessingTimeWindowOperatorTest method checkpointRestoreWithPendingWindowTumbling.

@Test
public void checkpointRestoreWithPendingWindowTumbling() {
    try {
        final int windowSize = 200;
        // tumbling window that triggers every 50 milliseconds
        AggregatingProcessingTimeWindowOperator<Integer, Tuple2<Integer, Integer>> op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSize);
        OneInputStreamOperatorTestHarness<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(op);
        testHarness.setProcessingTime(0);
        testHarness.setup();
        testHarness.open();
        // inject some elements
        final int numElementsFirst = 700;
        final int numElements = 1000;
        for (int i = 0; i < numElementsFirst; i++) {
            StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
            testHarness.processElement(next);
        }
        // draw a snapshot
        List<Tuple2<Integer, Integer>> resultAtSnapshot = extractFromStreamRecords(testHarness.getOutput());
        int beforeSnapShot = resultAtSnapshot.size();
        StreamStateHandle state = testHarness.snapshotLegacy(1L, System.currentTimeMillis());
        int afterSnapShot = testHarness.getOutput().size();
        assertEquals("operator performed computation during snapshot", beforeSnapShot, afterSnapShot);
        assertTrue(resultAtSnapshot.size() <= numElementsFirst);
        // inject some random elements, which should not show up in the state
        for (int i = numElementsFirst; i < numElements; i++) {
            StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
            testHarness.processElement(next);
        }
        testHarness.close();
        op.dispose();
        // re-create the operator and restore the state
        op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSize);
        testHarness = new OneInputStreamOperatorTestHarness<>(op);
        testHarness.setup();
        testHarness.restore(state);
        testHarness.open();
        // inject the remaining elements
        for (int i = numElementsFirst; i < numElements; i++) {
            StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
            testHarness.processElement(next);
        }
        testHarness.setProcessingTime(200);
        // get and verify the result
        List<Tuple2<Integer, Integer>> finalResult = new ArrayList<>(resultAtSnapshot);
        List<Tuple2<Integer, Integer>> partialFinalResult = extractFromStreamRecords(testHarness.getOutput());
        finalResult.addAll(partialFinalResult);
        assertEquals(numElements, finalResult.size());
        Collections.sort(finalResult, tupleComparator);
        for (int i = 0; i < numElements; i++) {
            assertEquals(i, finalResult.get(i).f0.intValue());
            assertEquals(i, finalResult.get(i).f1.intValue());
        }
        testHarness.close();
        op.dispose();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 37 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class AggregatingAlignedProcessingTimeWindowOperatorTest method checkpointRestoreWithPendingWindowSliding.

@Test
public void checkpointRestoreWithPendingWindowSliding() {
    try {
        final int factor = 4;
        final int windowSlide = 50;
        final int windowSize = factor * windowSlide;
        // sliding window (200 msecs) every 50 msecs
        AggregatingProcessingTimeWindowOperator<Integer, Tuple2<Integer, Integer>> op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSlide);
        OneInputStreamOperatorTestHarness<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(op);
        testHarness.setProcessingTime(0);
        testHarness.setup();
        testHarness.open();
        // inject some elements
        final int numElements = 1000;
        final int numElementsFirst = 700;
        for (int i = 0; i < numElementsFirst; i++) {
            StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
            testHarness.processElement(next);
        }
        // draw a snapshot
        List<Tuple2<Integer, Integer>> resultAtSnapshot = extractFromStreamRecords(testHarness.getOutput());
        int beforeSnapShot = resultAtSnapshot.size();
        StreamStateHandle state = testHarness.snapshotLegacy(1L, System.currentTimeMillis());
        int afterSnapShot = testHarness.getOutput().size();
        assertEquals("operator performed computation during snapshot", beforeSnapShot, afterSnapShot);
        assertTrue(resultAtSnapshot.size() <= factor * numElementsFirst);
        // inject the remaining elements - these should not influence the snapshot
        for (int i = numElementsFirst; i < numElements; i++) {
            StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
            testHarness.processElement(next);
        }
        testHarness.close();
        op.dispose();
        // re-create the operator and restore the state
        op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSlide);
        testHarness = new OneInputStreamOperatorTestHarness<>(op);
        testHarness.setup();
        testHarness.restore(state);
        testHarness.open();
        // inject again the remaining elements
        for (int i = numElementsFirst; i < numElements; i++) {
            StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
            testHarness.processElement(next);
        }
        testHarness.setProcessingTime(50);
        testHarness.setProcessingTime(100);
        testHarness.setProcessingTime(150);
        testHarness.setProcessingTime(200);
        testHarness.setProcessingTime(250);
        testHarness.setProcessingTime(300);
        testHarness.setProcessingTime(350);
        testHarness.setProcessingTime(400);
        // get and verify the result
        List<Tuple2<Integer, Integer>> finalResult = new ArrayList<>(resultAtSnapshot);
        List<Tuple2<Integer, Integer>> partialFinalResult = extractFromStreamRecords(testHarness.getOutput());
        finalResult.addAll(partialFinalResult);
        assertEquals(numElements * factor, finalResult.size());
        Collections.sort(finalResult, tupleComparator);
        for (int i = 0; i < factor * numElements; i++) {
            assertEquals(i / factor, finalResult.get(i).f0.intValue());
            assertEquals(i / factor, finalResult.get(i).f1.intValue());
        }
        testHarness.close();
        op.dispose();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 38 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class SavepointITCase method testTriggerSavepointAndResumeWithFileBasedCheckpoints.

/**
	 * Triggers a savepoint for a job that uses the FsStateBackend. We expect
	 * that all checkpoint files are written to a new savepoint directory.
	 *
	 * <ol>
	 * <li>Submit job, wait for some progress</li>
	 * <li>Trigger savepoint and verify that savepoint has been created</li>
	 * <li>Shut down the cluster, re-submit the job from the savepoint,
	 * verify that the initial state has been reset, and
	 * all tasks are running again</li>
	 * <li>Cancel job, dispose the savepoint, and verify that everything
	 * has been cleaned up</li>
	 * </ol>
	 */
@Test
public void testTriggerSavepointAndResumeWithFileBasedCheckpoints() throws Exception {
    // Config
    final int numTaskManagers = 2;
    final int numSlotsPerTaskManager = 2;
    final int parallelism = numTaskManagers * numSlotsPerTaskManager;
    final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    final File testRoot = folder.newFolder();
    TestingCluster flink = null;
    try {
        // Create a test actor system
        ActorSystem testActorSystem = AkkaUtils.createDefaultActorSystem();
        // Flink configuration
        final Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
        final File checkpointDir = new File(testRoot, "checkpoints");
        final File savepointRootDir = new File(testRoot, "savepoints");
        if (!checkpointDir.mkdir() || !savepointRootDir.mkdirs()) {
            fail("Test setup failed: failed to create temporary directories.");
        }
        // Use file based checkpoints
        config.setString(CoreOptions.STATE_BACKEND, "filesystem");
        config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
        config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
        config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointRootDir.toURI().toString());
        // Start Flink
        flink = new TestingCluster(config);
        flink.start(true);
        // Submit the job
        final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
        final JobID jobId = jobGraph.getJobID();
        // Reset the static test job helpers
        StatefulCounter.resetForTest(parallelism);
        // Retrieve the job manager
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");
        flink.submitJobDetached(jobGraph);
        LOG.info("Waiting for some progress.");
        // wait for the JobManager to be ready
        Future<Object> allRunning = jobManager.ask(new WaitForAllVerticesToBeRunning(jobId), deadline.timeLeft());
        Await.ready(allRunning, deadline.timeLeft());
        // wait for the Tasks to be ready
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        LOG.info("Triggering a savepoint.");
        Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobId, Option.<String>empty()), deadline.timeLeft());
        final String savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        LOG.info("Retrieved savepoint path: " + savepointPath + ".");
        // Retrieve the savepoint from the testing job manager
        LOG.info("Requesting the savepoint.");
        Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
        SavepointV1 savepoint = (SavepointV1) ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        flink.shutdown();
        flink.awaitTermination();
        // - Verification START -------------------------------------------
        // Only one savepoint should exist
        File[] files = savepointRootDir.listFiles();
        if (files != null) {
            assertEquals("Savepoint not created in expected directory", 1, files.length);
            assertTrue("Savepoint did not create self-contained directory", files[0].isDirectory());
            File savepointDir = files[0];
            File[] savepointFiles = savepointDir.listFiles();
            assertNotNull(savepointFiles);
            // Expect one metadata file and one checkpoint file per stateful
            // parallel subtask
            String errMsg = "Did not write expected number of savepoint/checkpoint files to directory: " + Arrays.toString(savepointFiles);
            assertEquals(errMsg, 1 + parallelism, savepointFiles.length);
        } else {
            fail("Savepoint not created in expected directory");
        }
        // We currently have the following directory layout: checkpointDir/jobId/chk-ID
        File jobCheckpoints = new File(checkpointDir, jobId.toString());
        if (jobCheckpoints.exists()) {
            files = jobCheckpoints.listFiles();
            assertNotNull("Checkpoint directory empty", files);
            assertEquals("Checkpoints directory not clean: " + Arrays.toString(files), 0, files.length);
        }
        // - Verification END ---------------------------------------------
        // Restart the cluster
        LOG.info("Restarting Flink cluster.");
        flink.start();
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        final Throwable[] error = new Throwable[1];
        final TestingCluster finalFlink = flink;
        final Multimap<JobVertexID, TaskDeploymentDescriptor> tdds = HashMultimap.create();
        new JavaTestKit(testActorSystem) {

            {
                new Within(deadline.timeLeft()) {

                    @Override
                    protected void run() {
                        try {
                            // Register to all submit task messages for job
                            for (ActorRef taskManager : finalFlink.getTaskManagersAsJava()) {
                                taskManager.tell(new TestingTaskManagerMessages.RegisterSubmitTaskListener(jobId), getTestActor());
                            }
                            // Set the savepoint path
                            jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
                            LOG.info("Resubmitting job " + jobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
                            // Submit the job
                            finalFlink.submitJobDetached(jobGraph);
                            int numTasks = 0;
                            for (JobVertex jobVertex : jobGraph.getVertices()) {
                                numTasks += jobVertex.getParallelism();
                            }
                            // Gather the task deployment descriptors
                            LOG.info("Gathering " + numTasks + " submitted " + "TaskDeploymentDescriptor instances.");
                            for (int i = 0; i < numTasks; i++) {
                                ResponseSubmitTaskListener resp = (ResponseSubmitTaskListener) expectMsgAnyClassOf(getRemainingTime(), ResponseSubmitTaskListener.class);
                                TaskDeploymentDescriptor tdd = resp.tdd();
                                LOG.info("Received: " + tdd.toString() + ".");
                                TaskInformation taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
                                tdds.put(taskInformation.getJobVertexId(), tdd);
                            }
                        } catch (Throwable t) {
                            error[0] = t;
                        }
                    }
                };
            }
        };
        // - Verification START -------------------------------------------
        String errMsg = "Error during gathering of TaskDeploymentDescriptors";
        assertNull(errMsg, error[0]);
        // have a matching task deployment descriptor.
        for (TaskState taskState : savepoint.getTaskStates()) {
            Collection<TaskDeploymentDescriptor> taskTdds = tdds.get(taskState.getJobVertexID());
            errMsg = "Missing task for savepoint state for operator " + taskState.getJobVertexID() + ".";
            assertTrue(errMsg, taskTdds.size() > 0);
            assertEquals(taskState.getNumberCollectedStates(), taskTdds.size());
            for (TaskDeploymentDescriptor tdd : taskTdds) {
                SubtaskState subtaskState = taskState.getState(tdd.getSubtaskIndex());
                assertNotNull(subtaskState);
                errMsg = "Initial operator state mismatch.";
                assertEquals(errMsg, subtaskState.getLegacyOperatorState(), tdd.getTaskStateHandles().getLegacyOperatorState());
            }
        }
        // Await state is restored
        StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // - Verification END ---------------------------------------------
        LOG.info("Cancelling job " + jobId + ".");
        jobManager.tell(new CancelJob(jobId));
        LOG.info("Disposing savepoint " + savepointPath + ".");
        Future<Object> disposeFuture = jobManager.ask(new DisposeSavepoint(savepointPath), deadline.timeLeft());
        errMsg = "Failed to dispose savepoint " + savepointPath + ".";
        Object resp = Await.result(disposeFuture, deadline.timeLeft());
        assertTrue(errMsg, resp.getClass() == getDisposeSavepointSuccess().getClass());
        // - Verification START -------------------------------------------
        // The checkpoint files
        List<File> checkpointFiles = new ArrayList<>();
        for (TaskState stateForTaskGroup : savepoint.getTaskStates()) {
            for (SubtaskState subtaskState : stateForTaskGroup.getStates()) {
                ChainedStateHandle<StreamStateHandle> streamTaskState = subtaskState.getLegacyOperatorState();
                for (int i = 0; i < streamTaskState.getLength(); i++) {
                    if (streamTaskState.get(i) != null) {
                        FileStateHandle fileStateHandle = (FileStateHandle) streamTaskState.get(i);
                        checkpointFiles.add(new File(fileStateHandle.getFilePath().toUri()));
                    }
                }
            }
        }
        // The checkpoint files of the savepoint should have been discarded
        for (File f : checkpointFiles) {
            errMsg = "Checkpoint file " + f + " not cleaned up properly.";
            assertFalse(errMsg, f.exists());
        }
        if (checkpointFiles.size() > 0) {
            File parent = checkpointFiles.get(0).getParentFile();
            errMsg = "Checkpoint parent directory " + parent + " not cleaned up properly.";
            assertFalse(errMsg, parent.exists());
        }
        // All savepoints should have been cleaned up
        errMsg = "Savepoints directory not cleaned up properly: " + Arrays.toString(savepointRootDir.listFiles()) + ".";
        assertEquals(errMsg, 0, savepointRootDir.listFiles().length);
    // - Verification END ---------------------------------------------
    } finally {
        if (flink != null) {
            flink.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) ResponseSubmitTaskListener(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages.ResponseSubmitTaskListener) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) SavepointV1(org.apache.flink.runtime.checkpoint.savepoint.SavepointV1) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) TriggerSavepointSuccess(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) File(java.io.File) TaskState(org.apache.flink.runtime.checkpoint.TaskState) JobID(org.apache.flink.api.common.JobID) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 39 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class FsCheckpointStateOutputStreamTest method runTest.

private void runTest(int numBytes, int bufferSize, int threshold, boolean expectFile) throws Exception {
    FsCheckpointStreamFactory.CheckpointStateOutputStream stream = new FsCheckpointStreamFactory.FsCheckpointStateOutputStream(TEMP_DIR_PATH, FileSystem.getLocalFileSystem(), bufferSize, threshold);
    Random rnd = new Random();
    byte[] original = new byte[numBytes];
    byte[] bytes = new byte[original.length];
    rnd.nextBytes(original);
    System.arraycopy(original, 0, bytes, 0, original.length);
    // the test writes a mixture of writing individual bytes and byte arrays
    int pos = 0;
    while (pos < bytes.length) {
        boolean single = rnd.nextBoolean();
        if (single) {
            stream.write(bytes[pos++]);
        } else {
            int num = rnd.nextInt(Math.min(10, bytes.length - pos));
            stream.write(bytes, pos, num);
            pos += num;
        }
    }
    StreamStateHandle handle = stream.closeAndGetHandle();
    if (expectFile) {
        assertTrue(handle instanceof FileStateHandle);
    } else {
        assertTrue(handle instanceof ByteStreamStateHandle);
    }
    // make sure the writing process did not alter the original byte array
    assertArrayEquals(original, bytes);
    try (InputStream inStream = handle.openInputStream()) {
        byte[] validation = new byte[bytes.length];
        DataInputStream dataInputStream = new DataInputStream(inStream);
        dataInputStream.readFully(validation);
        assertArrayEquals(bytes, validation);
    }
    handle.discardState();
}
Also used : FsCheckpointStateOutputStream(org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory.FsCheckpointStateOutputStream) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Random(java.util.Random) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) DataInputStream(java.io.DataInputStream)

Example 40 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class HeapKeyedStateBackend method snapshot.

@Override
@SuppressWarnings("unchecked")
public RunnableFuture<KeyGroupsStateHandle> snapshot(final long checkpointId, final long timestamp, final CheckpointStreamFactory streamFactory, CheckpointOptions checkpointOptions) throws Exception {
    if (!hasRegisteredState()) {
        return DoneFuture.nullValue();
    }
    long syncStartTime = System.currentTimeMillis();
    Preconditions.checkState(stateTables.size() <= Short.MAX_VALUE, "Too many KV-States: " + stateTables.size() + ". Currently at most " + Short.MAX_VALUE + " states are supported");
    List<KeyedBackendSerializationProxy.StateMetaInfo<?, ?>> metaInfoProxyList = new ArrayList<>(stateTables.size());
    final Map<String, Integer> kVStateToId = new HashMap<>(stateTables.size());
    final Map<StateTable<K, ?, ?>, StateTableSnapshot> cowStateStableSnapshots = new HashedMap(stateTables.size());
    for (Map.Entry<String, StateTable<K, ?, ?>> kvState : stateTables.entrySet()) {
        RegisteredBackendStateMetaInfo<?, ?> metaInfo = kvState.getValue().getMetaInfo();
        KeyedBackendSerializationProxy.StateMetaInfo<?, ?> metaInfoProxy = new KeyedBackendSerializationProxy.StateMetaInfo(metaInfo.getStateType(), metaInfo.getName(), metaInfo.getNamespaceSerializer(), metaInfo.getStateSerializer());
        metaInfoProxyList.add(metaInfoProxy);
        kVStateToId.put(kvState.getKey(), kVStateToId.size());
        StateTable<K, ?, ?> stateTable = kvState.getValue();
        if (null != stateTable) {
            cowStateStableSnapshots.put(stateTable, stateTable.createSnapshot());
        }
    }
    final KeyedBackendSerializationProxy serializationProxy = new KeyedBackendSerializationProxy(keySerializer, metaInfoProxyList);
    //--------------------------------------------------- this becomes the end of sync part
    // implementation of the async IO operation, based on FutureTask
    final AbstractAsyncIOCallable<KeyGroupsStateHandle, CheckpointStreamFactory.CheckpointStateOutputStream> ioCallable = new AbstractAsyncIOCallable<KeyGroupsStateHandle, CheckpointStreamFactory.CheckpointStateOutputStream>() {

        AtomicBoolean open = new AtomicBoolean(false);

        @Override
        public CheckpointStreamFactory.CheckpointStateOutputStream openIOHandle() throws Exception {
            if (open.compareAndSet(false, true)) {
                CheckpointStreamFactory.CheckpointStateOutputStream stream = streamFactory.createCheckpointStateOutputStream(checkpointId, timestamp);
                try {
                    cancelStreamRegistry.registerClosable(stream);
                    return stream;
                } catch (Exception ex) {
                    open.set(false);
                    throw ex;
                }
            } else {
                throw new IOException("Operation already opened.");
            }
        }

        @Override
        public KeyGroupsStateHandle performOperation() throws Exception {
            long asyncStartTime = System.currentTimeMillis();
            CheckpointStreamFactory.CheckpointStateOutputStream stream = getIoHandle();
            DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(stream);
            serializationProxy.write(outView);
            long[] keyGroupRangeOffsets = new long[keyGroupRange.getNumberOfKeyGroups()];
            for (int keyGroupPos = 0; keyGroupPos < keyGroupRange.getNumberOfKeyGroups(); ++keyGroupPos) {
                int keyGroupId = keyGroupRange.getKeyGroupId(keyGroupPos);
                keyGroupRangeOffsets[keyGroupPos] = stream.getPos();
                outView.writeInt(keyGroupId);
                for (Map.Entry<String, StateTable<K, ?, ?>> kvState : stateTables.entrySet()) {
                    outView.writeShort(kVStateToId.get(kvState.getKey()));
                    cowStateStableSnapshots.get(kvState.getValue()).writeMappingsInKeyGroup(outView, keyGroupId);
                }
            }
            if (open.compareAndSet(true, false)) {
                StreamStateHandle streamStateHandle = stream.closeAndGetHandle();
                KeyGroupRangeOffsets offsets = new KeyGroupRangeOffsets(keyGroupRange, keyGroupRangeOffsets);
                final KeyGroupsStateHandle keyGroupsStateHandle = new KeyGroupsStateHandle(offsets, streamStateHandle);
                if (asynchronousSnapshots) {
                    LOG.info("Heap backend snapshot ({}, asynchronous part) in thread {} took {} ms.", streamFactory, Thread.currentThread(), (System.currentTimeMillis() - asyncStartTime));
                }
                return keyGroupsStateHandle;
            } else {
                throw new IOException("Checkpoint stream already closed.");
            }
        }

        @Override
        public void done(boolean canceled) {
            if (open.compareAndSet(true, false)) {
                CheckpointStreamFactory.CheckpointStateOutputStream stream = getIoHandle();
                if (null != stream) {
                    cancelStreamRegistry.unregisterClosable(stream);
                    IOUtils.closeQuietly(stream);
                }
            }
            for (StateTableSnapshot snapshot : cowStateStableSnapshots.values()) {
                snapshot.release();
            }
        }
    };
    AsyncStoppableTaskWithCallback<KeyGroupsStateHandle> task = AsyncStoppableTaskWithCallback.from(ioCallable);
    if (!asynchronousSnapshots) {
        task.run();
    }
    LOG.info("Heap backend snapshot (" + streamFactory + ", synchronous part) in thread " + Thread.currentThread() + " took " + (System.currentTimeMillis() - syncStartTime) + " ms.");
    return task;
}
Also used : RegisteredBackendStateMetaInfo(org.apache.flink.runtime.state.RegisteredBackendStateMetaInfo) HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) ArrayList(java.util.ArrayList) KeyedBackendSerializationProxy(org.apache.flink.runtime.state.KeyedBackendSerializationProxy) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) IOException(java.io.IOException) AbstractAsyncIOCallable(org.apache.flink.runtime.io.async.AbstractAsyncIOCallable) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) HashedMap(org.apache.commons.collections.map.HashedMap) Map(java.util.Map) HashedMap(org.apache.commons.collections.map.HashedMap) HashMap(java.util.HashMap)

Aggregations

StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)42 ArrayList (java.util.ArrayList)20 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)18 Test (org.junit.Test)18 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)17 HashMap (java.util.HashMap)14 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)14 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)12 JobID (org.apache.flink.api.common.JobID)11 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)8 IOException (java.io.IOException)7 Configuration (org.apache.flink.configuration.Configuration)7 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)7 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)7 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)7 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)7 SubtaskState (org.apache.flink.runtime.checkpoint.SubtaskState)6 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)6 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)6 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)6