Search in sources :

Example 46 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenMaxParallelismChanges.

/**
	 * Tests that the checkpoint restoration fails if the max parallelism of the job vertices has
	 * changed.
	 *
	 * @throws Exception
	 */
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenMaxParallelismChanges() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID2, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    int newMaxParallelism1 = 20;
    int newMaxParallelism2 = 42;
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, newMaxParallelism1);
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, newMaxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    fail("The restoration should have failed because the max parallelism changed.");
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 47 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenParallelismChanges.

/**
	 * Tests that the checkpoint restoration fails if the parallelism of a job vertices with
	 * non-partitioned state has changed.
	 *
	 * @throws Exception
	 */
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenParallelismChanges() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> state = generateStateForVertex(jobVertexID2, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(state, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    int newParallelism1 = 4;
    int newParallelism2 = 3;
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, newParallelism1, maxParallelism1);
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    fail("The restoration should have failed because the parallelism of an vertex with " + "non-partitioned state changed.");
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 48 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class AbstractStreamOperator method restoreStreamCheckpointed.

@Deprecated
private void restoreStreamCheckpointed(OperatorStateHandles stateHandles) throws Exception {
    StreamStateHandle state = stateHandles.getLegacyOperatorState();
    if (null != state) {
        if (this instanceof CheckpointedRestoringOperator) {
            LOG.debug("Restore state of task {} in chain ({}).", stateHandles.getOperatorChainIndex(), getContainingTask().getName());
            FSDataInputStream is = state.openInputStream();
            try {
                getContainingTask().getCancelables().registerClosable(is);
                ((CheckpointedRestoringOperator) this).restoreState(is);
            } finally {
                getContainingTask().getCancelables().unregisterClosable(is);
                is.close();
            }
        } else {
            throw new Exception("Found legacy operator state for operator that does not implement StreamCheckpointedOperator.");
        }
    }
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) ConcurrentModificationException(java.util.ConcurrentModificationException) IOException(java.io.IOException)

Example 49 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class GenericWriteAheadSink method notifyOfCompletedCheckpoint.

@Override
public void notifyOfCompletedCheckpoint(long checkpointId) throws Exception {
    super.notifyOfCompletedCheckpoint(checkpointId);
    synchronized (pendingCheckpoints) {
        Iterator<PendingCheckpoint> pendingCheckpointIt = pendingCheckpoints.iterator();
        while (pendingCheckpointIt.hasNext()) {
            PendingCheckpoint pendingCheckpoint = pendingCheckpointIt.next();
            long pastCheckpointId = pendingCheckpoint.checkpointId;
            int subtaskId = pendingCheckpoint.subtaskId;
            long timestamp = pendingCheckpoint.timestamp;
            StreamStateHandle streamHandle = pendingCheckpoint.stateHandle;
            if (pastCheckpointId <= checkpointId) {
                try {
                    if (!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
                        try (FSDataInputStream in = streamHandle.openInputStream()) {
                            boolean success = sendValues(new ReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(new DataInputViewStreamWrapper(in), serializer), serializer), timestamp);
                            if (success) {
                                // in case the checkpoint was successfully committed,
                                // discard its state from the backend and mark it for removal
                                // in case it failed, we retry on the next checkpoint
                                committer.commitCheckpoint(subtaskId, pastCheckpointId);
                                streamHandle.discardState();
                                pendingCheckpointIt.remove();
                            }
                        }
                    } else {
                        streamHandle.discardState();
                        pendingCheckpointIt.remove();
                    }
                } catch (Exception e) {
                    // we have to break here to prevent a new (later) checkpoint
                    // from being committed before this one
                    LOG.error("Could not commit checkpoint.", e);
                    break;
                }
            }
        }
    }
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) IOException(java.io.IOException) InputViewIterator(org.apache.flink.runtime.io.disk.InputViewIterator)

Example 50 with StreamStateHandle

use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.

the class StreamTaskTest method testFailingCheckpointStreamOperator.

@Test
public void testFailingCheckpointStreamOperator() throws Exception {
    final long checkpointId = 42L;
    final long timestamp = 1L;
    TaskInfo mockTaskInfo = mock(TaskInfo.class);
    when(mockTaskInfo.getTaskNameWithSubtasks()).thenReturn("foobar");
    when(mockTaskInfo.getIndexOfThisSubtask()).thenReturn(0);
    Environment mockEnvironment = mock(Environment.class);
    when(mockEnvironment.getTaskInfo()).thenReturn(mockTaskInfo);
    StreamTask<?, AbstractStreamOperator<?>> streamTask = mock(StreamTask.class, Mockito.CALLS_REAL_METHODS);
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, timestamp);
    streamTask.setEnvironment(mockEnvironment);
    // mock the operators
    StreamOperator<?> streamOperator1 = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
    StreamOperator<?> streamOperator2 = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
    StreamOperator<?> streamOperator3 = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
    // mock the returned snapshots
    OperatorSnapshotResult operatorSnapshotResult1 = mock(OperatorSnapshotResult.class);
    OperatorSnapshotResult operatorSnapshotResult2 = mock(OperatorSnapshotResult.class);
    final Exception testException = new Exception("Test exception");
    when(streamOperator1.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(operatorSnapshotResult1);
    when(streamOperator2.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(operatorSnapshotResult2);
    when(streamOperator3.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenThrow(testException);
    // mock the returned legacy snapshots
    StreamStateHandle streamStateHandle1 = mock(StreamStateHandle.class);
    StreamStateHandle streamStateHandle2 = mock(StreamStateHandle.class);
    StreamStateHandle streamStateHandle3 = mock(StreamStateHandle.class);
    when(streamOperator1.snapshotLegacyOperatorState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(streamStateHandle1);
    when(streamOperator2.snapshotLegacyOperatorState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(streamStateHandle2);
    when(streamOperator3.snapshotLegacyOperatorState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(streamStateHandle3);
    // set up the task
    StreamOperator<?>[] streamOperators = { streamOperator1, streamOperator2, streamOperator3 };
    OperatorChain<Void, AbstractStreamOperator<Void>> operatorChain = mock(OperatorChain.class);
    when(operatorChain.getAllOperators()).thenReturn(streamOperators);
    Whitebox.setInternalState(streamTask, "isRunning", true);
    Whitebox.setInternalState(streamTask, "lock", new Object());
    Whitebox.setInternalState(streamTask, "operatorChain", operatorChain);
    Whitebox.setInternalState(streamTask, "cancelables", new CloseableRegistry());
    Whitebox.setInternalState(streamTask, "configuration", new StreamConfig(new Configuration()));
    try {
        streamTask.triggerCheckpoint(checkpointMetaData, CheckpointOptions.forFullCheckpoint());
        fail("Expected test exception here.");
    } catch (Exception e) {
        assertEquals(testException, e.getCause());
    }
    verify(operatorSnapshotResult1).cancel();
    verify(operatorSnapshotResult2).cancel();
    verify(streamStateHandle1).discardState();
    verify(streamStateHandle2).discardState();
    verify(streamStateHandle3).discardState();
}
Also used : Configuration(org.apache.flink.configuration.Configuration) OperatorSnapshotResult(org.apache.flink.streaming.api.operators.OperatorSnapshotResult) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) StreamCheckpointedOperator(org.apache.flink.streaming.api.operators.StreamCheckpointedOperator) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TaskInfo(org.apache.flink.api.common.TaskInfo) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) Environment(org.apache.flink.runtime.execution.Environment) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Aggregations

StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)84 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)36 HashMap (java.util.HashMap)32 ArrayList (java.util.ArrayList)30 Test (org.junit.Test)30 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)19 IOException (java.io.IOException)18 StateHandleID (org.apache.flink.runtime.state.StateHandleID)18 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)17 OperatorStreamStateHandle (org.apache.flink.runtime.state.OperatorStreamStateHandle)17 Map (java.util.Map)16 List (java.util.List)13 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)13 JobID (org.apache.flink.api.common.JobID)11 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 IncrementalRemoteKeyedStateHandle (org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle)11 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)10 CheckpointStateOutputStream (org.apache.flink.runtime.state.CheckpointStateOutputStream)10 Path (org.apache.flink.core.fs.Path)9 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)9