use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorTest method testExternallyInducedSourceWithOperatorCoordinator.
/**
* Test that the checkpoint still behave correctly when the task checkpoint is triggered by the
* master hooks and finished before the master checkpoint. Also make sure that the operator
* coordinators are checkpointed before starting the task checkpoint.
*/
@Test
public void testExternallyInducedSourceWithOperatorCoordinator() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot();
TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot();
OperatorSubtaskState subtaskState1 = OperatorSubtaskState.builder().build();
OperatorSubtaskState subtaskState2 = OperatorSubtaskState.builder().build();
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID2, subtaskState2);
// Create a mock OperatorCoordinatorCheckpointContext which completes the checkpoint
// immediately.
AtomicBoolean coordCheckpointDone = new AtomicBoolean(false);
OperatorCoordinatorCheckpointContext coordinatorCheckpointContext = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOnCallingCheckpointCoordinator((checkpointId, result) -> {
coordCheckpointDone.set(true);
result.complete(new byte[0]);
}).setOperatorID(opID1).build();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(coordinatorCheckpointContext)).build();
AtomicReference<Long> checkpointIdRef = new AtomicReference<>();
// Add a master hook which triggers and acks the task checkpoint immediately.
// In this case the task checkpoints would complete before the job master checkpoint
// completes.
checkpointCoordinator.addMasterHook(new MasterTriggerRestoreHook<Integer>() {
@Override
public String getIdentifier() {
return "anything";
}
@Override
@Nullable
public CompletableFuture<Integer> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
assertTrue("The coordinator checkpoint should have finished.", coordCheckpointDone.get());
// Acknowledge the checkpoint in the master hooks so the task snapshots
// complete before
// the master state snapshot completes.
checkpointIdRef.set(checkpointId);
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1);
AcknowledgeCheckpoint acknowledgeCheckpoint2 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint2, TASK_MANAGER_LOCATION_INFO);
return null;
}
@Override
public void restoreCheckpoint(long checkpointId, Integer checkpointData) throws Exception {
}
@Override
public SimpleVersionedSerializer<Integer> createCheckpointDataSerializer() {
return new SimpleVersionedSerializer<Integer>() {
@Override
public int getVersion() {
return 0;
}
@Override
public byte[] serialize(Integer obj) throws IOException {
return new byte[0];
}
@Override
public Integer deserialize(int version, byte[] serialized) throws IOException {
return 1;
}
};
}
});
// Verify initial state.
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
// now we should have a completed checkpoint
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
// the canceler should be removed now
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
// validate that the relevant tasks got a confirmation message
long checkpointId = checkpointIdRef.get();
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
}
CompletedCheckpoint success = checkpointCoordinator.getSuccessfulCheckpoints().get(0);
assertEquals(graph.getJobID(), success.getJobId());
assertEquals(2, success.getOperatorStates().size());
checkpointCoordinator.shutdown();
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorTriggeringTest method testTriggerCheckpointInitializationFailed.
@Test
public void testTriggerCheckpointInitializationFailed() throws Exception {
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setCheckpointIDCounter(new UnstableCheckpointIDCounter(id -> id == 0)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 = triggerPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
manuallyTriggeredScheduledExecutor.triggerAll();
try {
onCompletionPromise1.get();
fail("This checkpoint should fail through UnstableCheckpointIDCounter");
} catch (ExecutionException e) {
final Optional<CheckpointException> checkpointExceptionOptional = ExceptionUtils.findThrowable(e, CheckpointException.class);
assertTrue(checkpointExceptionOptional.isPresent());
assertEquals(CheckpointFailureReason.TRIGGER_CHECKPOINT_FAILURE, checkpointExceptionOptional.get().getCheckpointFailureReason());
}
assertFalse(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = triggerPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
manuallyTriggeredScheduledExecutor.triggerAll();
assertFalse(onCompletionPromise2.isCompletedExceptionally());
assertFalse(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorTriggeringTest method testTriggerCheckpointRequestQueuedWithFailure.
@Test
public void testTriggerCheckpointRequestQueuedWithFailure() throws Exception {
JobVertexID jobVertexID = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
ExecutionAttemptID attemptID = vertex.getCurrentExecutionAttempt().getAttemptId();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointIDCounter(new UnstableCheckpointIDCounter(id -> id == 0)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
// start a periodic checkpoint first
final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
// another trigger before the prior one finished
final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
// another trigger before the first one finished
final CompletableFuture<CompletedCheckpoint> onCompletionPromise3 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
assertEquals(2, checkpointCoordinator.getTriggerRequestQueue().size());
manuallyTriggeredScheduledExecutor.triggerAll();
// the first triggered checkpoint fails by design through UnstableCheckpointIDCounter
assertTrue(onCompletionPromise1.isCompletedExceptionally());
assertFalse(onCompletionPromise2.isCompletedExceptionally());
assertFalse(onCompletionPromise3.isCompletedExceptionally());
assertFalse(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
assertEquals(2, gateway.getTriggeredCheckpoints(attemptID).size());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class HeartbeatManagerTest method testHeartbeatMonitorUpdate.
/**
* Tests that the heartbeat monitors are updated when receiving a new heartbeat signal.
*/
@Test
public void testHeartbeatMonitorUpdate() {
long heartbeatTimeout = 1000L;
ResourceID ownResourceID = new ResourceID("foobar");
ResourceID targetResourceID = new ResourceID("barfoo");
Object expectedObject = new Object();
HeartbeatListener<Object, Object> heartbeatListener = new TestingHeartbeatListenerBuilder<>().setRetrievePayloadFunction(ignored -> CompletableFuture.completedFuture(expectedObject)).createNewTestingHeartbeatListener();
final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
HeartbeatManagerImpl<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, FAILED_RPC_THRESHOLD, ownResourceID, heartbeatListener, manuallyTriggeredScheduledExecutor, LOG);
heartbeatManager.monitorTarget(targetResourceID, new TestingHeartbeatTargetBuilder<>().createTestingHeartbeatTarget());
heartbeatManager.receiveHeartbeat(targetResourceID, expectedObject);
final List<ScheduledFuture<?>> scheduledTasksAfterHeartbeat = manuallyTriggeredScheduledExecutor.getAllScheduledTasks();
assertThat(scheduledTasksAfterHeartbeat, hasSize(2));
// the first scheduled future should be cancelled by the heartbeat update
assertTrue(scheduledTasksAfterHeartbeat.get(0).isCancelled());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class DeclarativeSlotManagerTest method testSlotReportWithConflictingJobIdDuringSlotAllocation.
/**
* Tests that a pending slot allocation is cancelled if a slot report indicates that the slot is
* already allocated by another job.
*/
@Test
public void testSlotReportWithConflictingJobIdDuringSlotAllocation() throws Exception {
final ResourceRequirements resourceRequirements = createResourceRequirementsForSingleSlot();
final ArrayBlockingQueue<SlotID> requestedSlotIds = new ArrayBlockingQueue<>(2);
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(FunctionUtils.uncheckedFunction(requestSlotParameters -> {
requestedSlotIds.put(requestSlotParameters.f0);
return new CompletableFuture<>();
})).createTestingTaskExecutorGateway();
final TaskExecutorConnection taskExecutorConnection = createTaskExecutorConnection(taskExecutorGateway);
final ResourceID resourceId = taskExecutorConnection.getResourceID();
final SlotID slotId1 = new SlotID(resourceId, 0);
final SlotID slotId2 = new SlotID(resourceId, 1);
final SlotReport slotReport = new SlotReport(Arrays.asList(createFreeSlotStatus(slotId1), createFreeSlotStatus(slotId2)));
final ScheduledExecutor mainThreadExecutor = new ManuallyTriggeredScheduledExecutor();
try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setScheduledExecutor(mainThreadExecutor).build()) {
slotManager.start(ResourceManagerId.generate(), mainThreadExecutor, new TestingResourceActionsBuilder().build());
slotManager.registerTaskManager(taskExecutorConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
slotManager.processResourceRequirements(resourceRequirements);
final SlotID firstRequestedSlotId = requestedSlotIds.take();
final SlotID freeSlotId = firstRequestedSlotId.equals(slotId1) ? slotId2 : slotId1;
final SlotReport newSlotReport = new SlotReport(Arrays.asList(createAllocatedSlotStatus(firstRequestedSlotId), createFreeSlotStatus(freeSlotId)));
slotManager.reportSlotStatus(taskExecutorConnection.getInstanceID(), newSlotReport);
final SlotID secondRequestedSlotId = requestedSlotIds.take();
assertEquals(freeSlotId, secondRequestedSlotId);
}
}
Aggregations