Search in sources :

Example 6 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class SlotPoolTest method setUp.

@Before
public void setUp() throws Exception {
    this.rpcService = new TestingSerialRpcService();
    this.jobId = new JobID();
    this.slotPool = new SlotPool(rpcService, jobId);
    this.mainThreadValidatorUtil = new MainThreadValidatorUtil(slotPool);
    mainThreadValidatorUtil.enterMainThread();
    slotPool.start(UUID.randomUUID());
    this.resourceManagerGateway = mock(ResourceManagerGateway.class);
    when(resourceManagerGateway.requestSlot(any(UUID.class), any(UUID.class), any(SlotRequest.class), any(Time.class))).thenReturn(mock(Future.class));
    slotPool.connectToResourceManager(UUID.randomUUID(), resourceManagerGateway);
}
Also used : TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) MainThreadValidatorUtil(org.apache.flink.runtime.rpc.MainThreadValidatorUtil) Future(org.apache.flink.runtime.concurrent.Future) Time(org.apache.flink.api.common.time.Time) UUID(java.util.UUID) SlotRequest(org.apache.flink.runtime.resourcemanager.SlotRequest) JobID(org.apache.flink.api.common.JobID) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) Before(org.junit.Before)

Example 7 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testRejectAllocationRequestsForOutOfSyncSlots.

/**
	 * Tests that all allocation requests for slots are ignored if the slot has been reported as
	 * free by the TaskExecutor but this report hasn't been confirmed by the ResourceManager.
	 *
	 * This is essential for the correctness of the state of the ResourceManager.
	 */
@Ignore
@Test
public void testRejectAllocationRequestsForOutOfSyncSlots() throws Exception {
    final ResourceID resourceID = ResourceID.generate();
    final String address1 = "/resource/manager/address/one";
    final UUID leaderId = UUID.randomUUID();
    final JobID jobId = new JobID();
    final String jobManagerAddress = "foobar";
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    try {
        // register the mock resource manager gateways
        ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
        rpc.registerGateway(address1, rmGateway1);
        TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
        TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
        haServices.setResourceManagerLeaderRetriever(testLeaderService);
        TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
        when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
        TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
        when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
        final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
        TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), mock(TaskSlotTable.class), mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
        taskManager.start();
        String taskManagerAddress = taskManager.getAddress();
        // no connection initially, since there is no leader
        assertNull(taskManager.getResourceManagerConnection());
        // define a leader and see that a registration happens
        testLeaderService.notifyListener(address1, leaderId);
        verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
        assertNotNull(taskManager.getResourceManagerConnection());
        // test that allocating a slot works
        final SlotID slotID = new SlotID(resourceID, 0);
        TMSlotRequestReply tmSlotRequestReply = taskManager.requestSlot(slotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
        assertTrue(tmSlotRequestReply instanceof TMSlotRequestRegistered);
        // TODO: Figure out the concrete allocation behaviour between RM and TM. Maybe we don't need the SlotID...
        // test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
        final SlotID unconfirmedFreeSlotID = new SlotID(resourceID, 1);
        TMSlotRequestReply tmSlotRequestReply2 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
        assertTrue(tmSlotRequestReply2 instanceof TMSlotRequestRejected);
        // re-register
        verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
        testLeaderService.notifyListener(address1, leaderId);
        // now we should be successful because the slots status has been synced
        // test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
        TMSlotRequestReply tmSlotRequestReply3 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
        assertTrue(tmSlotRequestReply3 instanceof TMSlotRequestRegistered);
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : TMSlotRequestReply(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestReply) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) Time(org.apache.flink.api.common.time.Time) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TMSlotRequestRejected(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestRejected) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) TMSlotRequestRegistered(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestRegistered) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 8 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testImmediatelyRegistersIfLeaderIsKnown.

@Test
public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception {
    final ResourceID resourceID = ResourceID.generate();
    final String resourceManagerAddress = "/resource/manager/address/one";
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    try {
        final FatalErrorHandler errorHandler = mock(FatalErrorHandler.class);
        // register a mock resource manager gateway
        ResourceManagerGateway rmGateway = mock(ResourceManagerGateway.class);
        when(rmGateway.registerTaskExecutor(any(UUID.class), anyString(), any(ResourceID.class), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new RegistrationResponse.Success()));
        TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
        when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
        rpc.registerGateway(resourceManagerAddress, rmGateway);
        TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
        when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
        NonHaServices haServices = new NonHaServices(resourceManagerAddress);
        final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
        final SlotReport slotReport = new SlotReport();
        when(taskSlotTable.createSlotReport(any(ResourceID.class))).thenReturn(slotReport);
        final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
        TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
        taskManager.start();
        String taskManagerAddress = taskManager.getAddress();
        verify(rmGateway).registerTaskExecutor(any(UUID.class), eq(taskManagerAddress), eq(resourceID), eq(slotReport), any(Time.class));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Time(org.apache.flink.api.common.time.Time) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) NonHaServices(org.apache.flink.runtime.highavailability.NonHaServices) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) Test(org.junit.Test)

Example 9 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testSlotAcceptance.

/**
	 * Tests that accepted slots go into state assigned and the others are returned to the resource
	 * manager.
	 */
@Test
public void testSlotAcceptance() throws Exception {
    final JobID jobId = new JobID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final Configuration configuration = new Configuration();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final ResourceID resourceId = new ResourceID("foobar");
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TimerService<AllocationID> timerService = mock(TimerService.class);
    final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
    final JobManagerTable jobManagerTable = new JobManagerTable();
    final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String resourceManagerAddress = "rm";
    final UUID resourceManagerLeaderId = UUID.randomUUID();
    final String jobManagerAddress = "jm";
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
    final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
    haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
    haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
    final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
    final InstanceID registrationId = new InstanceID();
    when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final int blobPort = 42;
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
    final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
    when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
    when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
    when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.completed((Iterable<SlotOffer>) Collections.singleton(offer1)));
    rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    try {
        TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
        taskManager.start();
        taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
        taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
        // we have to add the job after the TaskExecutor, because otherwise the service has not
        // been properly started.
        jobLeaderService.addJob(jobId, jobManagerAddress);
        verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
        assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
        assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
        assertTrue(taskSlotTable.isSlotFree(1));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) InstanceID(org.apache.flink.runtime.instance.InstanceID) Time(org.apache.flink.api.common.time.Time) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ResourceManagerGateway (org.apache.flink.runtime.resourcemanager.ResourceManagerGateway)9 UUID (java.util.UUID)7 Time (org.apache.flink.api.common.time.Time)7 TestingSerialRpcService (org.apache.flink.runtime.rpc.TestingSerialRpcService)7 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)6 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)6 FileCache (org.apache.flink.runtime.filecache.FileCache)6 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)6 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)6 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)6 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)6 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)6 TaskManagerMetricGroup (org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup)6 TaskSlotTable (org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable)6 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)6 TestingFatalErrorHandler (org.apache.flink.runtime.util.TestingFatalErrorHandler)6 Test (org.junit.Test)6 JobID (org.apache.flink.api.common.JobID)5 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)5 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)5