Search in sources :

Example 16 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class AccumulatingAlignedProcessingTimeWindowOperatorTest method checkpointRestoreWithPendingWindowSlidingWithProcessFunction.

@Test
public void checkpointRestoreWithPendingWindowSlidingWithProcessFunction() {
    try {
        final int factor = 4;
        final int windowSlide = 50;
        final int windowSize = factor * windowSlide;
        // sliding window (200 msecs) every 50 msecs
        AccumulatingProcessingTimeWindowOperator<Integer, Integer, Integer> op = new AccumulatingProcessingTimeWindowOperator<>(validatingIdentityProcessFunction, identitySelector, IntSerializer.INSTANCE, IntSerializer.INSTANCE, windowSize, windowSlide);
        OneInputStreamOperatorTestHarness<Integer, Integer> testHarness = new OneInputStreamOperatorTestHarness<>(op);
        testHarness.setProcessingTime(0);
        testHarness.setup();
        testHarness.open();
        // inject some elements
        final int numElements = 1000;
        final int numElementsFirst = 700;
        for (int i = 0; i < numElementsFirst; i++) {
            testHarness.processElement(new StreamRecord<>(i));
        }
        // draw a snapshot
        List<Integer> resultAtSnapshot = extractFromStreamRecords(testHarness.getOutput());
        int beforeSnapShot = testHarness.getOutput().size();
        StreamStateHandle state = testHarness.snapshotLegacy(1L, System.currentTimeMillis());
        int afterSnapShot = testHarness.getOutput().size();
        assertEquals("operator performed computation during snapshot", beforeSnapShot, afterSnapShot);
        assertTrue(resultAtSnapshot.size() <= factor * numElementsFirst);
        // inject the remaining elements - these should not influence the snapshot
        for (int i = numElementsFirst; i < numElements; i++) {
            testHarness.processElement(new StreamRecord<>(i));
        }
        testHarness.close();
        // re-create the operator and restore the state
        op = new AccumulatingProcessingTimeWindowOperator<>(validatingIdentityProcessFunction, identitySelector, IntSerializer.INSTANCE, IntSerializer.INSTANCE, windowSize, windowSlide);
        testHarness = new OneInputStreamOperatorTestHarness<>(op);
        testHarness.setup();
        testHarness.restore(state);
        testHarness.open();
        // inject again the remaining elements
        for (int i = numElementsFirst; i < numElements; i++) {
            testHarness.processElement(new StreamRecord<>(i));
        }
        testHarness.setProcessingTime(50);
        testHarness.setProcessingTime(100);
        testHarness.setProcessingTime(150);
        testHarness.setProcessingTime(200);
        testHarness.setProcessingTime(250);
        testHarness.setProcessingTime(300);
        testHarness.setProcessingTime(350);
        // get and verify the result
        List<Integer> finalResult = new ArrayList<>(resultAtSnapshot);
        List<Integer> finalPartialResult = extractFromStreamRecords(testHarness.getOutput());
        finalResult.addAll(finalPartialResult);
        assertEquals(factor * numElements, finalResult.size());
        Collections.sort(finalResult);
        for (int i = 0; i < factor * numElements; i++) {
            assertEquals(i / factor, finalResult.get(i).intValue());
        }
        testHarness.close();
        op.dispose();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 17 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class SavepointV0Serializer method convertOperatorAndFunctionState.

/**
	 * This is public so that we can use it when restoring a legacy snapshot
	 * in {@code AbstractStreamOperatorTestHarness}.
	 */
public static StreamStateHandle convertOperatorAndFunctionState(StreamTaskState streamTaskState) throws Exception {
    List<StreamStateHandle> mergeStateHandles = new ArrayList<>(4);
    StateHandle<Serializable> functionState = streamTaskState.getFunctionState();
    StateHandle<?> operatorState = streamTaskState.getOperatorState();
    if (null != functionState) {
        mergeStateHandles.add(SIGNAL_1);
        mergeStateHandles.add(convertStateHandle(functionState));
    } else {
        mergeStateHandles.add(SIGNAL_0);
    }
    if (null != operatorState) {
        mergeStateHandles.add(convertStateHandle(operatorState));
    }
    return new MigrationStreamStateHandle(new MultiStreamStateHandle(mergeStateHandles));
}
Also used : MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) Serializable(java.io.Serializable) MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle) ArrayList(java.util.ArrayList) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle)

Example 18 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class SavepointV0Serializer method convertKeyedBackendState.

/**
	 * This is public so that we can use it when restoring a legacy snapshot
	 * in {@code AbstractStreamOperatorTestHarness}.
	 */
public static KeyGroupsStateHandle convertKeyedBackendState(HashMap<String, KvStateSnapshot<?, ?, ?, ?>> oldKeyedState, int parallelInstanceIdx, long checkpointID) throws Exception {
    if (null != oldKeyedState) {
        CheckpointStreamFactory checkpointStreamFactory = new MemCheckpointStreamFactory(MAX_SIZE);
        CheckpointStreamFactory.CheckpointStateOutputStream keyedStateOut = checkpointStreamFactory.createCheckpointStateOutputStream(checkpointID, 0L);
        try {
            final long offset = keyedStateOut.getPos();
            InstantiationUtil.serializeObject(keyedStateOut, oldKeyedState);
            StreamStateHandle streamStateHandle = keyedStateOut.closeAndGetHandle();
            // makes IOUtils.closeQuietly(...) ignore this
            keyedStateOut = null;
            if (null != streamStateHandle) {
                KeyGroupRangeOffsets keyGroupRangeOffsets = new KeyGroupRangeOffsets(parallelInstanceIdx, parallelInstanceIdx, new long[] { offset });
                return new MigrationKeyGroupStateHandle(keyGroupRangeOffsets, streamStateHandle);
            }
        } finally {
            IOUtils.closeQuietly(keyedStateOut);
        }
    }
    return null;
}
Also used : MemCheckpointStreamFactory(org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory) MigrationKeyGroupStateHandle(org.apache.flink.migration.state.MigrationKeyGroupStateHandle) MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle) MemCheckpointStreamFactory(org.apache.flink.runtime.state.memory.MemCheckpointStreamFactory) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets)

Example 19 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class SavepointV0Serializer method convertSubtaskState.

private org.apache.flink.runtime.checkpoint.SubtaskState convertSubtaskState(SubtaskState subtaskState, int parallelInstanceIdx, ClassLoader userClassLoader, long checkpointID) throws Exception {
    SerializedValue<StateHandle<?>> serializedValue = subtaskState.getState();
    StreamTaskStateList stateList = (StreamTaskStateList) serializedValue.deserializeValue(userClassLoader);
    StreamTaskState[] streamTaskStates = stateList.getState(userClassLoader);
    List<StreamStateHandle> newChainStateList = Arrays.asList(new StreamStateHandle[streamTaskStates.length]);
    KeyGroupsStateHandle newKeyedState = null;
    for (int chainIdx = 0; chainIdx < streamTaskStates.length; ++chainIdx) {
        StreamTaskState streamTaskState = streamTaskStates[chainIdx];
        if (streamTaskState == null) {
            continue;
        }
        newChainStateList.set(chainIdx, convertOperatorAndFunctionState(streamTaskState));
        HashMap<String, KvStateSnapshot<?, ?, ?, ?>> oldKeyedState = streamTaskState.getKvStates();
        if (null != oldKeyedState) {
            Preconditions.checkState(null == newKeyedState, "Found more than one keyed state in chain");
            newKeyedState = convertKeyedBackendState(oldKeyedState, parallelInstanceIdx, checkpointID);
        }
    }
    ChainedStateHandle<StreamStateHandle> newChainedState = new ChainedStateHandle<>(newChainStateList);
    ChainedStateHandle<OperatorStateHandle> nopChain = new ChainedStateHandle<>(Arrays.asList(new OperatorStateHandle[newChainedState.getLength()]));
    return new org.apache.flink.runtime.checkpoint.SubtaskState(newChainedState, nopChain, nopChain, newKeyedState, null);
}
Also used : KvStateSnapshot(org.apache.flink.migration.runtime.state.KvStateSnapshot) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle) SubtaskState(org.apache.flink.migration.runtime.checkpoint.SubtaskState) StreamTaskStateList(org.apache.flink.migration.streaming.runtime.tasks.StreamTaskStateList) MigrationKeyGroupStateHandle(org.apache.flink.migration.state.MigrationKeyGroupStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) AbstractFileStateHandle(org.apache.flink.migration.runtime.state.filesystem.AbstractFileStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) MigrationStreamStateHandle(org.apache.flink.migration.state.MigrationStreamStateHandle) SerializedStateHandle(org.apache.flink.migration.runtime.state.memory.SerializedStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) StateHandle(org.apache.flink.migration.runtime.state.StateHandle) MultiStreamStateHandle(org.apache.flink.runtime.state.MultiStreamStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) StreamTaskState(org.apache.flink.migration.streaming.runtime.tasks.StreamTaskState)

Example 20 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedStateWithChangingParallelism.

/**
	 * Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
	 * state.
	 *
	 * @throws Exception
	 */
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = scaleOut ? 2 : 13;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    int newParallelism2 = scaleOut ? 13 : 2;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    //vertex 1
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, opStateBackend, null, keyedStateBackend, keyedStateRaw);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    //vertex 2
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
        ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
        ChainedStateHandle<OperatorStateHandle> opStateRaw = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, true);
        expectedOpStatesBackend.add(opStateBackend);
        expectedOpStatesRaw.add(opStateRaw);
        SubtaskState checkpointStateHandles = new SubtaskState(new ChainedStateHandle<>(Collections.<StreamStateHandle>singletonList(null)), opStateBackend, opStateRaw, keyedStateBackend, keyedStateRaw);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    // rescale vertex 2
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    // verify the restored state
    verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
    List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
    List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
    for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
        KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
        KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
        TaskStateHandles taskStateHandles = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
        ChainedStateHandle<StreamStateHandle> operatorState = taskStateHandles.getLegacyOperatorState();
        List<Collection<OperatorStateHandle>> opStateBackend = taskStateHandles.getManagedOperatorState();
        List<Collection<OperatorStateHandle>> opStateRaw = taskStateHandles.getRawOperatorState();
        Collection<KeyGroupsStateHandle> keyGroupStateBackend = taskStateHandles.getManagedKeyedState();
        Collection<KeyGroupsStateHandle> keyGroupStateRaw = taskStateHandles.getRawKeyedState();
        actualOpStatesBackend.add(opStateBackend);
        actualOpStatesRaw.add(opStateRaw);
        assertNull(operatorState);
        compareKeyedState(Collections.singletonList(originalKeyedStateBackend), keyGroupStateBackend);
        compareKeyedState(Collections.singletonList(originalKeyedStateRaw), keyGroupStateRaw);
    }
    comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
    comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) List(java.util.List) ArrayList(java.util.ArrayList) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID)

Aggregations

StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)42 ArrayList (java.util.ArrayList)20 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)18 Test (org.junit.Test)18 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)17 HashMap (java.util.HashMap)14 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)14 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)12 JobID (org.apache.flink.api.common.JobID)11 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)8 IOException (java.io.IOException)7 Configuration (org.apache.flink.configuration.Configuration)7 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)7 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)7 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)7 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)7 SubtaskState (org.apache.flink.runtime.checkpoint.SubtaskState)6 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)6 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)6 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)6