use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.
the class JobMasterTest method testTaskManagerRegistrationTriggersHeartbeating.
@Test
public void testTaskManagerRegistrationTriggersHeartbeating() throws Exception {
final CompletableFuture<ResourceID> heartbeatResourceIdFuture = new CompletableFuture<>();
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, ignored) -> {
heartbeatResourceIdFuture.complete(taskManagerId);
return FutureUtils.completedVoidFuture();
}).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(new HeartbeatServices(1L, 10000L)).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
// register task manager will trigger monitor heartbeat target, schedule heartbeat
// request at interval time
CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
// wait for the completion of the registration
registrationResponse.get();
assertThat(heartbeatResourceIdFuture.join(), anyOf(nullValue(), equalTo(jmResourceId)));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.
the class JobMasterTest method testReleasingTaskExecutorIfNoMoreSlotsRegistered.
/**
* Tests that the TaskExecutor is released if all of its slots have been freed.
*/
@Test
public void testReleasingTaskExecutorIfNoMoreSlotsRegistered() throws Exception {
final JobGraph jobGraph = createSingleVertexJobWithRestartStrategy();
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setFreeSlotFunction((allocationID, throwable) -> {
freedSlotFuture.complete(allocationID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID)).createTestingTaskExecutorGateway();
final LocalUnresolvedTaskManagerLocation taskManagerLocation = new LocalUnresolvedTaskManagerLocation();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, jobGraph.getJobID(), testingTaskExecutorGateway, taskManagerLocation);
// check that we accepted the offered slot
assertThat(slotOffers, hasSize(1));
final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();
// now fail the allocation and check that we close the connection to the TaskExecutor
jobMasterGateway.failSlot(taskManagerLocation.getResourceID(), allocationId, new FlinkException("Fail allocation test exception"));
// we should free the slot and then disconnect from the TaskExecutor because we use no
// longer slots from it
assertThat(freedSlotFuture.get(), equalTo(allocationId));
assertThat(disconnectTaskExecutorFuture.get(), equalTo(jobGraph.getJobID()));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.
the class JobMasterExecutionDeploymentReconciliationTest method testExecutionDeploymentReconciliationForPendingExecution.
/**
* Tests that the job master does not issue a cancel call if the heartbeat reports an execution
* for which the deployment was not yet acknowledged.
*/
@Test
public void testExecutionDeploymentReconciliationForPendingExecution() throws Exception {
TestingExecutionDeploymentTrackerWrapper deploymentTrackerWrapper = new TestingExecutionDeploymentTrackerWrapper();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
JobMaster jobMaster = createAndStartJobMaster(deploymentTrackerWrapper, jobGraph);
JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
RPC_SERVICE_RESOURCE.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final CompletableFuture<ExecutionAttemptID> taskSubmissionFuture = new CompletableFuture<>();
final CompletableFuture<ExecutionAttemptID> taskCancellationFuture = new CompletableFuture<>();
final CompletableFuture<Acknowledge> taskSubmissionAcknowledgeFuture = new CompletableFuture<>();
TaskExecutorGateway taskExecutorGateway = createTaskExecutorGateway(taskCancellationFuture, taskSubmissionFuture, taskSubmissionAcknowledgeFuture);
LocalUnresolvedTaskManagerLocation localUnresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
registerTaskExecutorAndOfferSlots(jobMasterGateway, jobGraph.getJobID(), taskExecutorGateway, localUnresolvedTaskManagerLocation);
ExecutionAttemptID pendingExecutionId = taskSubmissionFuture.get();
// the execution has not been acknowledged yet by the TaskExecutor, but we already allow the
// ID to be in the heartbeat payload
jobMasterGateway.heartbeatFromTaskManager(localUnresolvedTaskManagerLocation.getResourceID(), new TaskExecutorToJobManagerHeartbeatPayload(new AccumulatorReport(Collections.emptyList()), new ExecutionDeploymentReport(Collections.singleton(pendingExecutionId))));
taskSubmissionAcknowledgeFuture.complete(Acknowledge.get());
deploymentTrackerWrapper.getTaskDeploymentFuture().get();
assertFalse(taskCancellationFuture.isDone());
}
use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.
the class JobMasterTestUtils method registerTaskExecutorAndOfferSlots.
public static void registerTaskExecutorAndOfferSlots(TestingRpcService rpcService, JobMasterGateway jobMasterGateway, JobID jobId, int numSlots, TaskExecutorGateway taskExecutorGateway, Time testingTimeout) throws ExecutionException, InterruptedException {
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
jobMasterGateway.registerTaskManager(jobId, TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout).get();
Collection<SlotOffer> slotOffers = IntStream.range(0, numSlots).mapToObj(index -> new SlotOffer(new AllocationID(), index, ResourceProfile.ANY)).collect(Collectors.toList());
jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), slotOffers, testingTimeout).get();
}
use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.
the class TaskExecutorTest method testReconnectionAttemptIfExplicitlyDisconnected.
/**
* Tests that the TaskExecutor tries to reconnect to a ResourceManager from which it was
* explicitly disconnected.
*/
@Test
public void testReconnectionAttemptIfExplicitlyDisconnected() throws Exception {
final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final TaskExecutor taskExecutor = createTaskExecutor(new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).build());
taskExecutor.start();
try {
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final ClusterInformation clusterInformation = new ClusterInformation("foobar", 1234);
final CompletableFuture<RegistrationResponse> registrationResponseFuture = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), ResourceID.generate(), clusterInformation));
final BlockingQueue<ResourceID> registrationQueue = new ArrayBlockingQueue<>(1);
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
registrationQueue.offer(taskExecutorRegistration.getResourceId());
return registrationResponseFuture;
});
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
final ResourceID firstRegistrationAttempt = registrationQueue.take();
assertThat(firstRegistrationAttempt, equalTo(unresolvedTaskManagerLocation.getResourceID()));
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
assertThat(registrationQueue, is(empty()));
taskExecutorGateway.disconnectResourceManager(new FlinkException("Test exception"));
final ResourceID secondRegistrationAttempt = registrationQueue.take();
assertThat(secondRegistrationAttempt, equalTo(unresolvedTaskManagerLocation.getResourceID()));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
Aggregations