use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenParallelismChanges.
/**
* Tests that the checkpoint restoration fails if the parallelism of a job vertices with
* non-partitioned state has changed.
*
* @throws Exception
*/
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenParallelismChanges() throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = 2;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> state = generateStateForVertex(jobVertexID2, index);
KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(state, null, null, keyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
int newParallelism1 = 4;
int newParallelism2 = 3;
final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, newParallelism1, maxParallelism1);
final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
tasks.put(jobVertexID1, newJobVertex1);
tasks.put(jobVertexID2, newJobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
fail("The restoration should have failed because the parallelism of an vertex with " + "non-partitioned state changed.");
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class AbstractStreamOperator method restoreStreamCheckpointed.
@Deprecated
private void restoreStreamCheckpointed(OperatorStateHandles stateHandles) throws Exception {
StreamStateHandle state = stateHandles.getLegacyOperatorState();
if (null != state) {
if (this instanceof CheckpointedRestoringOperator) {
LOG.debug("Restore state of task {} in chain ({}).", stateHandles.getOperatorChainIndex(), getContainingTask().getName());
FSDataInputStream is = state.openInputStream();
try {
getContainingTask().getCancelables().registerClosable(is);
((CheckpointedRestoringOperator) this).restoreState(is);
} finally {
getContainingTask().getCancelables().unregisterClosable(is);
is.close();
}
} else {
throw new Exception("Found legacy operator state for operator that does not implement StreamCheckpointedOperator.");
}
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class GenericWriteAheadSink method notifyOfCompletedCheckpoint.
@Override
public void notifyOfCompletedCheckpoint(long checkpointId) throws Exception {
super.notifyOfCompletedCheckpoint(checkpointId);
synchronized (pendingCheckpoints) {
Iterator<PendingCheckpoint> pendingCheckpointIt = pendingCheckpoints.iterator();
while (pendingCheckpointIt.hasNext()) {
PendingCheckpoint pendingCheckpoint = pendingCheckpointIt.next();
long pastCheckpointId = pendingCheckpoint.checkpointId;
int subtaskId = pendingCheckpoint.subtaskId;
long timestamp = pendingCheckpoint.timestamp;
StreamStateHandle streamHandle = pendingCheckpoint.stateHandle;
if (pastCheckpointId <= checkpointId) {
try {
if (!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
try (FSDataInputStream in = streamHandle.openInputStream()) {
boolean success = sendValues(new ReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(new DataInputViewStreamWrapper(in), serializer), serializer), timestamp);
if (success) {
// in case the checkpoint was successfully committed,
// discard its state from the backend and mark it for removal
// in case it failed, we retry on the next checkpoint
committer.commitCheckpoint(subtaskId, pastCheckpointId);
streamHandle.discardState();
pendingCheckpointIt.remove();
}
}
} else {
streamHandle.discardState();
pendingCheckpointIt.remove();
}
} catch (Exception e) {
// we have to break here to prevent a new (later) checkpoint
// from being committed before this one
LOG.error("Could not commit checkpoint.", e);
break;
}
}
}
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class StreamTaskTest method testFailingCheckpointStreamOperator.
@Test
public void testFailingCheckpointStreamOperator() throws Exception {
final long checkpointId = 42L;
final long timestamp = 1L;
TaskInfo mockTaskInfo = mock(TaskInfo.class);
when(mockTaskInfo.getTaskNameWithSubtasks()).thenReturn("foobar");
when(mockTaskInfo.getIndexOfThisSubtask()).thenReturn(0);
Environment mockEnvironment = mock(Environment.class);
when(mockEnvironment.getTaskInfo()).thenReturn(mockTaskInfo);
StreamTask<?, AbstractStreamOperator<?>> streamTask = mock(StreamTask.class, Mockito.CALLS_REAL_METHODS);
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, timestamp);
streamTask.setEnvironment(mockEnvironment);
// mock the operators
StreamOperator<?> streamOperator1 = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
StreamOperator<?> streamOperator2 = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
StreamOperator<?> streamOperator3 = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
// mock the returned snapshots
OperatorSnapshotResult operatorSnapshotResult1 = mock(OperatorSnapshotResult.class);
OperatorSnapshotResult operatorSnapshotResult2 = mock(OperatorSnapshotResult.class);
final Exception testException = new Exception("Test exception");
when(streamOperator1.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(operatorSnapshotResult1);
when(streamOperator2.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(operatorSnapshotResult2);
when(streamOperator3.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenThrow(testException);
// mock the returned legacy snapshots
StreamStateHandle streamStateHandle1 = mock(StreamStateHandle.class);
StreamStateHandle streamStateHandle2 = mock(StreamStateHandle.class);
StreamStateHandle streamStateHandle3 = mock(StreamStateHandle.class);
when(streamOperator1.snapshotLegacyOperatorState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(streamStateHandle1);
when(streamOperator2.snapshotLegacyOperatorState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(streamStateHandle2);
when(streamOperator3.snapshotLegacyOperatorState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(streamStateHandle3);
// set up the task
StreamOperator<?>[] streamOperators = { streamOperator1, streamOperator2, streamOperator3 };
OperatorChain<Void, AbstractStreamOperator<Void>> operatorChain = mock(OperatorChain.class);
when(operatorChain.getAllOperators()).thenReturn(streamOperators);
Whitebox.setInternalState(streamTask, "isRunning", true);
Whitebox.setInternalState(streamTask, "lock", new Object());
Whitebox.setInternalState(streamTask, "operatorChain", operatorChain);
Whitebox.setInternalState(streamTask, "cancelables", new CloseableRegistry());
Whitebox.setInternalState(streamTask, "configuration", new StreamConfig(new Configuration()));
try {
streamTask.triggerCheckpoint(checkpointMetaData, CheckpointOptions.forFullCheckpoint());
fail("Expected test exception here.");
} catch (Exception e) {
assertEquals(testException, e.getCause());
}
verify(operatorSnapshotResult1).cancel();
verify(operatorSnapshotResult2).cancel();
verify(streamStateHandle1).discardState();
verify(streamStateHandle2).discardState();
verify(streamStateHandle3).discardState();
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.
/**
* FLINK-5667
*
* Tests that a concurrent cancel operation does not discard the state handles of an
* acknowledged checkpoint. The situation can only happen if the cancel call is executed
* after Environment.acknowledgeCheckpoint() and before the
* CloseableRegistry.unregisterClosable() call.
*/
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
final long checkpointId = 42L;
final long timestamp = 1L;
final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
final OneShotLatch completeAcknowledge = new OneShotLatch();
TaskInfo mockTaskInfo = mock(TaskInfo.class);
when(mockTaskInfo.getTaskNameWithSubtasks()).thenReturn("foobar");
when(mockTaskInfo.getIndexOfThisSubtask()).thenReturn(0);
Environment mockEnvironment = mock(Environment.class);
when(mockEnvironment.getTaskInfo()).thenReturn(mockTaskInfo);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
acknowledgeCheckpointLatch.trigger();
// block here so that we can issue the concurrent cancel call
completeAcknowledge.await();
return null;
}
}).when(mockEnvironment).acknowledgeCheckpoint(anyLong(), any(CheckpointMetrics.class), any(SubtaskState.class));
StreamTask<?, AbstractStreamOperator<?>> streamTask = mock(StreamTask.class, Mockito.CALLS_REAL_METHODS);
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, timestamp);
streamTask.setEnvironment(mockEnvironment);
StreamOperator<?> streamOperator = mock(StreamOperator.class, withSettings().extraInterfaces(StreamCheckpointedOperator.class));
KeyGroupsStateHandle managedKeyedStateHandle = mock(KeyGroupsStateHandle.class);
KeyGroupsStateHandle rawKeyedStateHandle = mock(KeyGroupsStateHandle.class);
OperatorStateHandle managedOperatorStateHandle = mock(OperatorStateHandle.class);
OperatorStateHandle rawOperatorStateHandle = mock(OperatorStateHandle.class);
OperatorSnapshotResult operatorSnapshotResult = new OperatorSnapshotResult(new DoneFuture<>(managedKeyedStateHandle), new DoneFuture<>(rawKeyedStateHandle), new DoneFuture<>(managedOperatorStateHandle), new DoneFuture<>(rawOperatorStateHandle));
when(streamOperator.snapshotState(anyLong(), anyLong(), any(CheckpointOptions.class))).thenReturn(operatorSnapshotResult);
StreamOperator<?>[] streamOperators = { streamOperator };
OperatorChain<Void, AbstractStreamOperator<Void>> operatorChain = mock(OperatorChain.class);
when(operatorChain.getAllOperators()).thenReturn(streamOperators);
StreamStateHandle streamStateHandle = mock(StreamStateHandle.class);
CheckpointStreamFactory.CheckpointStateOutputStream outStream = mock(CheckpointStreamFactory.CheckpointStateOutputStream.class);
when(outStream.closeAndGetHandle()).thenReturn(streamStateHandle);
CheckpointStreamFactory mockStreamFactory = mock(CheckpointStreamFactory.class);
when(mockStreamFactory.createCheckpointStateOutputStream(anyLong(), anyLong())).thenReturn(outStream);
AbstractStateBackend mockStateBackend = mock(AbstractStateBackend.class);
when(mockStateBackend.createStreamFactory(any(JobID.class), anyString())).thenReturn(mockStreamFactory);
Whitebox.setInternalState(streamTask, "isRunning", true);
Whitebox.setInternalState(streamTask, "lock", new Object());
Whitebox.setInternalState(streamTask, "operatorChain", operatorChain);
Whitebox.setInternalState(streamTask, "cancelables", new CloseableRegistry());
Whitebox.setInternalState(streamTask, "asyncOperationsThreadPool", Executors.newFixedThreadPool(1));
Whitebox.setInternalState(streamTask, "configuration", new StreamConfig(new Configuration()));
Whitebox.setInternalState(streamTask, "stateBackend", mockStateBackend);
streamTask.triggerCheckpoint(checkpointMetaData, CheckpointOptions.forFullCheckpoint());
acknowledgeCheckpointLatch.await();
ArgumentCaptor<SubtaskState> subtaskStateCaptor = ArgumentCaptor.forClass(SubtaskState.class);
// check that the checkpoint has been completed
verify(mockEnvironment).acknowledgeCheckpoint(eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
SubtaskState subtaskState = subtaskStateCaptor.getValue();
// check that the subtask state contains the expected state handles
assertEquals(managedKeyedStateHandle, subtaskState.getManagedKeyedState());
assertEquals(rawKeyedStateHandle, subtaskState.getRawKeyedState());
assertEquals(new ChainedStateHandle<>(Collections.singletonList(managedOperatorStateHandle)), subtaskState.getManagedOperatorState());
assertEquals(new ChainedStateHandle<>(Collections.singletonList(rawOperatorStateHandle)), subtaskState.getRawOperatorState());
// check that the state handles have not been discarded
verify(managedKeyedStateHandle, never()).discardState();
verify(rawKeyedStateHandle, never()).discardState();
verify(managedOperatorStateHandle, never()).discardState();
verify(rawOperatorStateHandle, never()).discardState();
streamTask.cancel();
completeAcknowledge.trigger();
// canceling the stream task after it has acknowledged the checkpoint should not discard
// the state handles
verify(managedKeyedStateHandle, never()).discardState();
verify(rawKeyedStateHandle, never()).discardState();
verify(managedOperatorStateHandle, never()).discardState();
verify(rawOperatorStateHandle, never()).discardState();
}
Aggregations