use of org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestRejected in project flink by apache.
the class SlotManager method sendSlotRequest.
private void sendSlotRequest(final ResourceSlot freeSlot, final SlotRequest slotRequest) {
final AllocationID allocationID = slotRequest.getAllocationId();
final TaskExecutorRegistration registration = freeSlot.getTaskExecutorRegistration();
final Future<TMSlotRequestReply> slotRequestReplyFuture = registration.getTaskExecutorGateway().requestSlot(freeSlot.getSlotId(), slotRequest.getJobId(), allocationID, // TODO: set proper JM address
"foobar", rmServices.getLeaderID(), timeout);
slotRequestReplyFuture.handleAsync(new BiFunction<TMSlotRequestReply, Throwable, Void>() {
@Override
public Void apply(TMSlotRequestReply slotRequestReply, Throwable throwable) {
TaskExecutorRegistration current = taskManagers.get(slotRequestReply.getResourceID());
if (current != null && current.getInstanceID().equals(slotRequestReply.getInstanceID())) {
if (throwable != null || slotRequestReply instanceof TMSlotRequestRejected) {
handleSlotRequestFailedAtTaskManager(slotRequest, freeSlot.getSlotId());
} else {
LOG.debug("Successfully registered slot {} ", freeSlot.getSlotId());
}
} else {
LOG.debug("Discarding message from obsolete TaskExecutor with InstanceID {}", slotRequestReply.getInstanceID());
}
return null;
}
}, rmServices.getMainThreadExecutor());
}
use of org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestRejected in project flink by apache.
the class TaskExecutorTest method testRejectAllocationRequestsForOutOfSyncSlots.
/**
* Tests that all allocation requests for slots are ignored if the slot has been reported as
* free by the TaskExecutor but this report hasn't been confirmed by the ResourceManager.
*
* This is essential for the correctness of the state of the ResourceManager.
*/
@Ignore
@Test
public void testRejectAllocationRequestsForOutOfSyncSlots() throws Exception {
final ResourceID resourceID = ResourceID.generate();
final String address1 = "/resource/manager/address/one";
final UUID leaderId = UUID.randomUUID();
final JobID jobId = new JobID();
final String jobManagerAddress = "foobar";
final TestingSerialRpcService rpc = new TestingSerialRpcService();
try {
// register the mock resource manager gateways
ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
rpc.registerGateway(address1, rmGateway1);
TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
haServices.setResourceManagerLeaderRetriever(testLeaderService);
TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), mock(TaskSlotTable.class), mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
taskManager.start();
String taskManagerAddress = taskManager.getAddress();
// no connection initially, since there is no leader
assertNull(taskManager.getResourceManagerConnection());
// define a leader and see that a registration happens
testLeaderService.notifyListener(address1, leaderId);
verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
assertNotNull(taskManager.getResourceManagerConnection());
// test that allocating a slot works
final SlotID slotID = new SlotID(resourceID, 0);
TMSlotRequestReply tmSlotRequestReply = taskManager.requestSlot(slotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply instanceof TMSlotRequestRegistered);
// TODO: Figure out the concrete allocation behaviour between RM and TM. Maybe we don't need the SlotID...
// test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
final SlotID unconfirmedFreeSlotID = new SlotID(resourceID, 1);
TMSlotRequestReply tmSlotRequestReply2 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply2 instanceof TMSlotRequestRejected);
// re-register
verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
testLeaderService.notifyListener(address1, leaderId);
// now we should be successful because the slots status has been synced
// test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
TMSlotRequestReply tmSlotRequestReply3 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply3 instanceof TMSlotRequestRegistered);
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
Aggregations