use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointStateRestoreTest method testSetState.
/**
* Tests that on restore the task state is reset for each stateful task.
*/
@Test
public void testSetState() {
try {
KeyGroupRange keyGroupRange = KeyGroupRange.of(0, 0);
List<SerializableObject> testStates = Collections.singletonList(new SerializableObject());
final KeyedStateHandle serializedKeyGroupStates = CheckpointCoordinatorTestingUtils.generateKeyGroupState(keyGroupRange, testStates);
final JobVertexID statefulId = new JobVertexID();
final JobVertexID statelessId = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(statefulId, 3, 256).addJobVertex(statelessId, 2, 256).build();
ExecutionJobVertex stateful = graph.getJobVertex(statefulId);
ExecutionJobVertex stateless = graph.getJobVertex(statelessId);
ExecutionVertex stateful1 = stateful.getTaskVertices()[0];
ExecutionVertex stateful2 = stateful.getTaskVertices()[1];
ExecutionVertex stateful3 = stateful.getTaskVertices()[2];
ExecutionVertex stateless1 = stateless.getTaskVertices()[0];
ExecutionVertex stateless2 = stateless.getTaskVertices()[1];
Execution statefulExec1 = stateful1.getCurrentExecutionAttempt();
Execution statefulExec2 = stateful2.getCurrentExecutionAttempt();
Execution statefulExec3 = stateful3.getCurrentExecutionAttempt();
Execution statelessExec1 = stateless1.getCurrentExecutionAttempt();
Execution statelessExec2 = stateless2.getCurrentExecutionAttempt();
ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).build();
// create ourselves a checkpoint with state
coord.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
PendingCheckpoint pending = coord.getPendingCheckpoints().values().iterator().next();
final long checkpointId = pending.getCheckpointId();
final TaskStateSnapshot subtaskStates = new TaskStateSnapshot();
subtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(statefulId), OperatorSubtaskState.builder().setManagedKeyedState(serializedKeyGroupStates).build());
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec1.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec2.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statefulExec3.getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskStates), TASK_MANAGER_LOCATION_INFO);
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statelessExec1.getAttemptId(), checkpointId), TASK_MANAGER_LOCATION_INFO);
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), statelessExec2.getAttemptId(), checkpointId), TASK_MANAGER_LOCATION_INFO);
assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
// let the coordinator inject the state
assertTrue(coord.restoreLatestCheckpointedStateToAll(new HashSet<>(Arrays.asList(stateful, stateless)), false));
// verify that each stateful vertex got the state
assertEquals(subtaskStates, statefulExec1.getTaskRestore().getTaskStateSnapshot());
assertEquals(subtaskStates, statefulExec2.getTaskRestore().getTaskStateSnapshot());
assertEquals(subtaskStates, statefulExec3.getTaskRestore().getTaskStateSnapshot());
assertNull(statelessExec1.getTaskRestore());
assertNull(statelessExec2.getTaskRestore());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class MemoryExecutionGraphInfoStoreTest method testExecutionGraphExpiration.
/**
* Tests that an expired execution graph is removed from the execution graph store.
*/
@Test
public void testExecutionGraphExpiration() throws Exception {
final Time expirationTime = Time.milliseconds(1L);
final ManuallyTriggeredScheduledExecutor scheduledExecutor = new ManuallyTriggeredScheduledExecutor();
final ManualTicker manualTicker = new ManualTicker();
try (final MemoryExecutionGraphInfoStore executionGraphInfoStore = new MemoryExecutionGraphInfoStore(expirationTime, Integer.MAX_VALUE, scheduledExecutor, manualTicker)) {
final ExecutionGraphInfo executionGraphInfo = new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).build());
executionGraphInfoStore.put(executionGraphInfo);
// there should one execution graph
assertThat(executionGraphInfoStore.size(), Matchers.equalTo(1));
manualTicker.advanceTime(expirationTime.toMilliseconds(), TimeUnit.MILLISECONDS);
// this should trigger the cleanup after expiration
scheduledExecutor.triggerScheduledTasks();
assertThat(executionGraphInfoStore.size(), Matchers.equalTo(0));
assertThat(executionGraphInfoStore.get(executionGraphInfo.getJobId()), Matchers.nullValue());
// check that the store is empty
assertThat(executionGraphInfoStore.size(), Matchers.equalTo(0));
}
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorTest method testNotifyCheckpointAbortionInOperatorCoordinator.
@Test
public void testNotifyCheckpointAbortionInOperatorCoordinator() throws Exception {
JobVertexID jobVertexID = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).build();
ExecutionVertex executionVertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
ExecutionAttemptID attemptID = executionVertex.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinatorTestingUtils.MockOperatorCoordinatorCheckpointContext context = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOperatorID(new OperatorID()).setOnCallingCheckpointCoordinator((ignored, future) -> future.complete(new byte[0])).build();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(context)).build();
try {
// Trigger checkpoint 1.
checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
long checkpointId1 = Collections.max(checkpointCoordinator.getPendingCheckpoints().keySet());
// Trigger checkpoint 2.
checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
// Acknowledge checkpoint 2. This should abort checkpoint 1.
long checkpointId2 = Collections.max(checkpointCoordinator.getPendingCheckpoints().keySet());
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID, checkpointId2, new CheckpointMetrics(), null);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, "");
// OperatorCoordinator should have been notified of the abortion of checkpoint 1.
assertEquals(Collections.singletonList(1L), context.getAbortedCheckpoints());
assertEquals(Collections.singletonList(2L), context.getCompletedCheckpoints());
} finally {
checkpointCoordinator.shutdown();
}
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorFailureTest method testStoringFailureHandling.
private void testStoringFailureHandling(Exception failure, int expectedCleanupCalls) throws Exception {
final JobVertexID jobVertexID1 = new JobVertexID();
final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).build();
final ExecutionVertex vertex = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
final ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
final StandaloneCheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
final CompletedCheckpointStore completedCheckpointStore = new FailingCompletedCheckpointStore(failure);
final AtomicInteger cleanupCallCount = new AtomicInteger(0);
final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointIDCounter(checkpointIDCounter).setCheckpointsCleaner(new CheckpointsCleaner() {
private static final long serialVersionUID = 2029876992397573325L;
@Override
public void cleanCheckpointOnFailedStoring(CompletedCheckpoint completedCheckpoint, Executor executor) {
cleanupCallCount.incrementAndGet();
super.cleanCheckpointOnFailedStoring(completedCheckpoint, executor);
}
}).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
try {
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptId, checkpointIDCounter.getLast()), "unknown location");
fail("CheckpointException should have been thrown.");
} catch (CheckpointException e) {
assertThat(e.getCheckpointFailureReason(), is(CheckpointFailureReason.FINALIZE_CHECKPOINT_FAILURE));
}
assertThat(cleanupCallCount.get(), is(expectedCleanupCalls));
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorFailureTest method testFailingCompletedCheckpointStoreAdd.
/**
* Tests that a failure while storing a completed checkpoint in the completed checkpoint store
* will properly fail the originating pending checkpoint and clean upt the completed checkpoint.
*/
@Test
public void testFailingCompletedCheckpointStoreAdd() throws Exception {
JobVertexID jobVertexId = new JobVertexID();
final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
ExecutionGraph testGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).build();
ExecutionVertex vertex = testGraph.getJobVertex(jobVertexId).getTaskVertices()[0];
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(testGraph).setCompletedCheckpointStore(new FailingCompletedCheckpointStore(new Exception("The failing completed checkpoint store failed again... :-("))).setTimer(manuallyTriggeredScheduledExecutor).build();
coord.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(1, coord.getNumberOfPendingCheckpoints());
PendingCheckpoint pendingCheckpoint = coord.getPendingCheckpoints().values().iterator().next();
assertFalse(pendingCheckpoint.isDisposed());
final long checkpointId = coord.getPendingCheckpoints().keySet().iterator().next();
KeyedStateHandle managedKeyedHandle = mock(KeyedStateHandle.class);
KeyedStateHandle rawKeyedHandle = mock(KeyedStateHandle.class);
OperatorStateHandle managedOpHandle = mock(OperatorStreamStateHandle.class);
OperatorStateHandle rawOpHandle = mock(OperatorStreamStateHandle.class);
InputChannelStateHandle inputChannelStateHandle = new InputChannelStateHandle(new InputChannelInfo(0, 1), mock(StreamStateHandle.class), Collections.singletonList(1L));
ResultSubpartitionStateHandle resultSubpartitionStateHandle = new ResultSubpartitionStateHandle(new ResultSubpartitionInfo(0, 1), mock(StreamStateHandle.class), Collections.singletonList(1L));
final OperatorSubtaskState operatorSubtaskState = spy(OperatorSubtaskState.builder().setManagedOperatorState(managedOpHandle).setRawOperatorState(rawOpHandle).setManagedKeyedState(managedKeyedHandle).setRawKeyedState(rawKeyedHandle).setInputChannelState(StateObjectCollection.singleton(inputChannelStateHandle)).setResultSubpartitionState(StateObjectCollection.singleton(resultSubpartitionStateHandle)).build());
TaskStateSnapshot subtaskState = spy(new TaskStateSnapshot());
subtaskState.putSubtaskStateByOperatorID(new OperatorID(), operatorSubtaskState);
when(subtaskState.getSubtaskStateByOperatorID(OperatorID.fromJobVertexID(vertex.getJobvertexId()))).thenReturn(operatorSubtaskState);
AcknowledgeCheckpoint acknowledgeMessage = new AcknowledgeCheckpoint(testGraph.getJobID(), vertex.getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), subtaskState);
try {
coord.receiveAcknowledgeMessage(acknowledgeMessage, "Unknown location");
fail("Expected a checkpoint exception because the completed checkpoint store could not " + "store the completed checkpoint.");
} catch (CheckpointException e) {
// ignore because we expected this exception
}
// make sure that the pending checkpoint has been discarded after we could not complete it
assertTrue(pendingCheckpoint.isDisposed());
// make sure that the subtask state has been discarded after we could not complete it.
verify(operatorSubtaskState).discardState();
verify(operatorSubtaskState.getManagedOperatorState().iterator().next()).discardState();
verify(operatorSubtaskState.getRawOperatorState().iterator().next()).discardState();
verify(operatorSubtaskState.getManagedKeyedState().iterator().next()).discardState();
verify(operatorSubtaskState.getRawKeyedState().iterator().next()).discardState();
verify(operatorSubtaskState.getInputChannelState().iterator().next().getDelegate()).discardState();
verify(operatorSubtaskState.getResultSubpartitionState().iterator().next().getDelegate()).discardState();
}
Aggregations