use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.
the class InstanceTest method testCancelAllSlots.
@Test
public void testCancelAllSlots() {
try {
ResourceID resourceID = ResourceID.generate();
HardwareDescription hardwareDescription = new HardwareDescription(4, 2L * 1024 * 1024 * 1024, 1024 * 1024 * 1024, 512 * 1024 * 1024);
InetAddress address = InetAddress.getByName("127.0.0.1");
TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);
Instance instance = new Instance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), connection, new InstanceID(), hardwareDescription, 3);
assertEquals(3, instance.getNumberOfAvailableSlots());
SimpleSlot slot1 = instance.allocateSimpleSlot(new JobID());
SimpleSlot slot2 = instance.allocateSimpleSlot(new JobID());
SimpleSlot slot3 = instance.allocateSimpleSlot(new JobID());
instance.cancelAndReleaseAllSlots();
assertEquals(3, instance.getNumberOfAvailableSlots());
assertTrue(slot1.isCanceled());
assertTrue(slot2.isCanceled());
assertTrue(slot3.isCanceled());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.
the class SimpleSlotTest method getSlot.
public static SimpleSlot getSlot() throws Exception {
ResourceID resourceID = ResourceID.generate();
HardwareDescription hardwareDescription = new HardwareDescription(4, 2L * 1024 * 1024 * 1024, 1024 * 1024 * 1024, 512 * 1024 * 1024);
InetAddress address = InetAddress.getByName("127.0.0.1");
TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);
Instance instance = new Instance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), connection, new InstanceID(), hardwareDescription, 1);
return instance.allocateSimpleSlot(new JobID());
}
use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.
the class TaskExecutorITCase method testSlotAllocation.
@Test
public void testSlotAllocation() throws Exception {
TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TestingHighAvailabilityServices testingHAServices = new TestingHighAvailabilityServices();
final Configuration configuration = new Configuration();
final ScheduledExecutorService scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
final ResourceID taskManagerResourceId = new ResourceID("foobar");
final UUID rmLeaderId = UUID.randomUUID();
final TestingLeaderElectionService rmLeaderElectionService = new TestingLeaderElectionService();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
final String rmAddress = "rm";
final String jmAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final JobID jobId = new JobID();
final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1);
testingHAServices.setResourceManagerLeaderElectionService(rmLeaderElectionService);
testingHAServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
testingHAServices.setJobMasterLeaderRetriever(jobId, new TestingLeaderRetrievalService(jmAddress, jmLeaderId));
TestingSerialRpcService rpcService = new TestingSerialRpcService();
ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.milliseconds(500L), Time.milliseconds(500L), Time.minutes(5L));
SlotManagerFactory slotManagerFactory = new DefaultSlotManager.Factory();
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(testingHAServices, rpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
MetricRegistry metricRegistry = mock(MetricRegistry.class);
HeartbeatServices heartbeatServices = mock(HeartbeatServices.class, RETURNS_MOCKS);
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(taskManagerResourceId, InetAddress.getLocalHost(), 1234);
final MemoryManager memoryManager = mock(MemoryManager.class);
final IOManager ioManager = mock(IOManager.class);
final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
final TaskManagerMetricGroup taskManagerMetricGroup = mock(TaskManagerMetricGroup.class);
final BroadcastVariableManager broadcastVariableManager = mock(BroadcastVariableManager.class);
final FileCache fileCache = mock(FileCache.class);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(resourceProfile), new TimerService<AllocationID>(scheduledExecutorService, 100L));
final JobManagerTable jobManagerTable = new JobManagerTable();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
ResourceManager<ResourceID> resourceManager = new StandaloneResourceManager(rpcService, resourceManagerConfiguration, testingHAServices, slotManagerFactory, metricRegistry, jobLeaderIdService, testingFatalErrorHandler);
TaskExecutor taskExecutor = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpcService, memoryManager, ioManager, networkEnvironment, testingHAServices, heartbeatServices, metricRegistry, taskManagerMetricGroup, broadcastVariableManager, fileCache, taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
JobMasterGateway jmGateway = mock(JobMasterGateway.class);
when(jmGateway.registerTaskManager(any(String.class), any(TaskManagerLocation.class), eq(jmLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(taskManagerResourceId, 1234)));
when(jmGateway.getHostname()).thenReturn(jmAddress);
rpcService.registerGateway(rmAddress, resourceManager.getSelf());
rpcService.registerGateway(jmAddress, jmGateway);
final AllocationID allocationId = new AllocationID();
final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile);
final SlotOffer slotOffer = new SlotOffer(allocationId, 0, resourceProfile);
try {
resourceManager.start();
taskExecutor.start();
// notify the RM that it is the leader
rmLeaderElectionService.isLeader(rmLeaderId);
// notify the TM about the new RM leader
rmLeaderRetrievalService.notifyListener(rmAddress, rmLeaderId);
Future<RegistrationResponse> registrationResponseFuture = resourceManager.registerJobManager(rmLeaderId, jmLeaderId, jmAddress, jobId);
RegistrationResponse registrationResponse = registrationResponseFuture.get();
assertTrue(registrationResponse instanceof JobMasterRegistrationSuccess);
resourceManager.requestSlot(jmLeaderId, rmLeaderId, slotRequest);
verify(jmGateway).offerSlots(eq(taskManagerResourceId), (Iterable<SlotOffer>) argThat(Matchers.contains(slotOffer)), eq(jmLeaderId), any(Time.class));
} finally {
if (testingFatalErrorHandler.hasExceptionOccurred()) {
testingFatalErrorHandler.rethrowError();
}
}
}
use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.
the class TaskExecutorTest method testHeartbeatTimeoutWithJobManager.
@Test
public void testHeartbeatTimeoutWithJobManager() throws Exception {
final JobID jobId = new JobID();
final Configuration configuration = new Configuration();
final TaskManagerConfiguration tmConfig = TaskManagerConfiguration.fromConfiguration(configuration);
final ResourceID tmResourceId = new ResourceID("tm");
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class)), mock(TimerService.class));
final TestingSerialRpcService rpc = new TestingSerialRpcService();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
final TestingLeaderRetrievalService jmLeaderRetrievalService = new TestingLeaderRetrievalService();
haServices.setJobMasterLeaderRetriever(jobId, jmLeaderRetrievalService);
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final long heartbeatTimeout = 10L;
HeartbeatServices heartbeatServices = mock(HeartbeatServices.class);
when(heartbeatServices.createHeartbeatManager(eq(taskManagerLocation.getResourceID()), any(HeartbeatListener.class), any(ScheduledExecutor.class), any(Logger.class))).thenAnswer(new Answer<HeartbeatManagerImpl<Void, Void>>() {
@Override
public HeartbeatManagerImpl<Void, Void> answer(InvocationOnMock invocation) throws Throwable {
return new HeartbeatManagerImpl<>(heartbeatTimeout, taskManagerLocation.getResourceID(), (HeartbeatListener<Void, Void>) invocation.getArguments()[1], (Executor) invocation.getArguments()[2], (ScheduledExecutor) invocation.getArguments()[2], (Logger) invocation.getArguments()[3]);
}
});
final String jobMasterAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final ResourceID jmResourceId = new ResourceID(jobMasterAddress);
final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
final int blobPort = 42;
when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jmLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
when(jobMasterGateway.getAddress()).thenReturn(jobMasterAddress);
when(jobMasterGateway.getHostname()).thenReturn("localhost");
try {
final TaskExecutor taskManager = new TaskExecutor(tmConfig, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, heartbeatServices, mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, new JobManagerTable(), jobLeaderService, testingFatalErrorHandler);
taskManager.start();
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
// we have to add the job after the TaskExecutor, because otherwise the service has not
// been properly started.
jobLeaderService.addJob(jobId, jobMasterAddress);
// now inform the task manager about the new job leader
jmLeaderRetrievalService.notifyListener(jobMasterAddress, jmLeaderId);
// register task manager success will trigger monitoring heartbeat target between tm and jm
verify(jobMasterGateway).registerTaskManager(eq(taskManager.getAddress()), eq(taskManagerLocation), eq(jmLeaderId), any(Time.class));
// the timeout should trigger disconnecting from the JobManager
verify(jobMasterGateway, timeout(heartbeatTimeout * 5)).disconnectTaskManager(eq(taskManagerLocation.getResourceID()), any(TimeoutException.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.
the class TaskExecutorTest method testRejectAllocationRequestsForOutOfSyncSlots.
/**
* Tests that all allocation requests for slots are ignored if the slot has been reported as
* free by the TaskExecutor but this report hasn't been confirmed by the ResourceManager.
*
* This is essential for the correctness of the state of the ResourceManager.
*/
@Ignore
@Test
public void testRejectAllocationRequestsForOutOfSyncSlots() throws Exception {
final ResourceID resourceID = ResourceID.generate();
final String address1 = "/resource/manager/address/one";
final UUID leaderId = UUID.randomUUID();
final JobID jobId = new JobID();
final String jobManagerAddress = "foobar";
final TestingSerialRpcService rpc = new TestingSerialRpcService();
try {
// register the mock resource manager gateways
ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
rpc.registerGateway(address1, rmGateway1);
TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
haServices.setResourceManagerLeaderRetriever(testLeaderService);
TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), mock(TaskSlotTable.class), mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
taskManager.start();
String taskManagerAddress = taskManager.getAddress();
// no connection initially, since there is no leader
assertNull(taskManager.getResourceManagerConnection());
// define a leader and see that a registration happens
testLeaderService.notifyListener(address1, leaderId);
verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
assertNotNull(taskManager.getResourceManagerConnection());
// test that allocating a slot works
final SlotID slotID = new SlotID(resourceID, 0);
TMSlotRequestReply tmSlotRequestReply = taskManager.requestSlot(slotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply instanceof TMSlotRequestRegistered);
// TODO: Figure out the concrete allocation behaviour between RM and TM. Maybe we don't need the SlotID...
// test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
final SlotID unconfirmedFreeSlotID = new SlotID(resourceID, 1);
TMSlotRequestReply tmSlotRequestReply2 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply2 instanceof TMSlotRequestRejected);
// re-register
verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
testLeaderService.notifyListener(address1, leaderId);
// now we should be successful because the slots status has been synced
// test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
TMSlotRequestReply tmSlotRequestReply3 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply3 instanceof TMSlotRequestRegistered);
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
Aggregations