Search in sources :

Example 1 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutor method freeSlot.

private void freeSlot(AllocationID allocationId, Throwable cause) {
    Preconditions.checkNotNull(allocationId);
    try {
        int freedSlotIndex = taskSlotTable.freeSlot(allocationId, cause);
        if (freedSlotIndex != -1 && isConnectedToResourceManager()) {
            // the slot was freed. Tell the RM about it
            ResourceManagerGateway resourceManagerGateway = resourceManagerConnection.getTargetGateway();
            resourceManagerGateway.notifySlotAvailable(resourceManagerConnection.getTargetLeaderId(), resourceManagerConnection.getRegistrationId(), new SlotID(getResourceID(), freedSlotIndex));
        }
    } catch (SlotNotFoundException e) {
        log.debug("Could not free slot for allocation id {}.", allocationId, e);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway)

Example 2 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testJobLeaderDetection.

/**
	 * Tests that a TaskManager detects a job leader for which has reserved slots. Upon detecting
	 * the job leader, it will offer all reserved slots to the JobManager.
	 */
@Test
public void testJobLeaderDetection() throws Exception {
    final JobID jobId = new JobID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final Configuration configuration = new Configuration();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final ResourceID resourceId = new ResourceID("foobar");
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TimerService<AllocationID> timerService = mock(TimerService.class);
    final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class)), timerService);
    final JobManagerTable jobManagerTable = new JobManagerTable();
    final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final TestingLeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService();
    final TestingLeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService();
    haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
    haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
    final String resourceManagerAddress = "rm";
    final UUID resourceManagerLeaderId = UUID.randomUUID();
    final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
    final InstanceID registrationId = new InstanceID();
    when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
    final String jobManagerAddress = "jm";
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final int blobPort = 42;
    final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
    when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
    when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
    rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    final AllocationID allocationId = new AllocationID();
    final SlotID slotId = new SlotID(resourceId, 0);
    final SlotOffer slotOffer = new SlotOffer(allocationId, 0, ResourceProfile.UNKNOWN);
    try {
        TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
        taskManager.start();
        // tell the task manager about the rm leader
        resourceManagerLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerLeaderId);
        // request slots from the task manager under the given allocation id
        TMSlotRequestReply reply = taskManager.requestSlot(slotId, jobId, allocationId, jobManagerAddress, resourceManagerLeaderId);
        // this is hopefully successful :-)
        assertTrue(reply instanceof TMSlotRequestRegistered);
        // now inform the task manager about the new job leader
        jobManagerLeaderRetrievalService.notifyListener(jobManagerAddress, jobManagerLeaderId);
        // the job leader should get the allocation id offered
        verify(jobMasterGateway).offerSlots(any(ResourceID.class), (Iterable<SlotOffer>) Matchers.argThat(contains(slotOffer)), eq(jobManagerLeaderId), any(Time.class));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TMSlotRequestReply(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestReply) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) InstanceID(org.apache.flink.runtime.instance.InstanceID) Time(org.apache.flink.api.common.time.Time) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) TMSlotRequestRegistered(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestRegistered) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 3 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testTriggerRegistrationOnLeaderChange.

@Test
public void testTriggerRegistrationOnLeaderChange() throws Exception {
    final ResourceID resourceID = ResourceID.generate();
    final String address1 = "/resource/manager/address/one";
    final String address2 = "/resource/manager/address/two";
    final UUID leaderId1 = UUID.randomUUID();
    final UUID leaderId2 = UUID.randomUUID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    try {
        // register the mock resource manager gateways
        ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
        ResourceManagerGateway rmGateway2 = mock(ResourceManagerGateway.class);
        when(rmGateway1.registerTaskExecutor(any(UUID.class), anyString(), any(ResourceID.class), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new RegistrationResponse.Success()));
        when(rmGateway2.registerTaskExecutor(any(UUID.class), anyString(), any(ResourceID.class), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new RegistrationResponse.Success()));
        rpc.registerGateway(address1, rmGateway1);
        rpc.registerGateway(address2, rmGateway2);
        TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
        TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
        haServices.setResourceManagerLeaderRetriever(testLeaderService);
        TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
        when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
        when(taskManagerServicesConfiguration.getConfiguration()).thenReturn(new Configuration());
        when(taskManagerServicesConfiguration.getTmpDirectories()).thenReturn(new String[1]);
        TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
        when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
        when(taskManagerLocation.getHostname()).thenReturn("foobar");
        final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
        final SlotReport slotReport = new SlotReport();
        when(taskSlotTable.createSlotReport(any(ResourceID.class))).thenReturn(slotReport);
        final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
        TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
        taskManager.start();
        String taskManagerAddress = taskManager.getAddress();
        // no connection initially, since there is no leader
        assertNull(taskManager.getResourceManagerConnection());
        // define a leader and see that a registration happens
        testLeaderService.notifyListener(address1, leaderId1);
        verify(rmGateway1).registerTaskExecutor(eq(leaderId1), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
        assertNotNull(taskManager.getResourceManagerConnection());
        // cancel the leader 
        testLeaderService.notifyListener(null, null);
        // set a new leader, see that a registration happens 
        testLeaderService.notifyListener(address2, leaderId2);
        verify(rmGateway2).registerTaskExecutor(eq(leaderId2), eq(taskManagerAddress), eq(resourceID), eq(slotReport), any(Time.class));
        assertNotNull(taskManager.getResourceManagerConnection());
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) Time(org.apache.flink.api.common.time.Time) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) Test(org.junit.Test)

Example 4 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testSubmitTaskBeforeAcceptSlot.

/**
	 * This tests task executor receive SubmitTask before OfferSlot response.
	 */
@Test
public void testSubmitTaskBeforeAcceptSlot() throws Exception {
    final JobID jobId = new JobID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final Configuration configuration = new Configuration();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final ResourceID resourceId = new ResourceID("foobar");
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TimerService<AllocationID> timerService = mock(TimerService.class);
    final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
    final JobManagerTable jobManagerTable = new JobManagerTable();
    final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String resourceManagerAddress = "rm";
    final UUID resourceManagerLeaderId = UUID.randomUUID();
    final String jobManagerAddress = "jm";
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
    final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
    haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
    haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
    final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
    final InstanceID registrationId = new InstanceID();
    when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final int blobPort = 42;
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
    final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
    when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
    when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
    rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
    when(libraryCacheManager.getClassLoader(eq(jobId))).thenReturn(getClass().getClassLoader());
    final JobManagerConnection jobManagerConnection = new JobManagerConnection(jobId, jmResourceId, jobMasterGateway, jobManagerLeaderId, mock(TaskManagerActions.class), mock(CheckpointResponder.class), libraryCacheManager, mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class));
    jobManagerTable.put(jobId, jobManagerConnection);
    try {
        final TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
        taskManager.start();
        taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
        taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
        final JobVertexID jobVertexId = new JobVertexID();
        JobInformation jobInformation = new JobInformation(jobId, name.getMethodName(), new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
        TaskInformation taskInformation = new TaskInformation(jobVertexId, "test task", 1, 1, TestInvokable.class.getName(), new Configuration());
        SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(jobInformation);
        SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(taskInformation);
        final TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, new ExecutionAttemptID(), allocationId1, 0, 0, 0, null, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList());
        CompletableFuture<Iterable<SlotOffer>> offerResultFuture = new FlinkCompletableFuture<>();
        // submit task first and then return acceptance response
        when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(offerResultFuture);
        // we have to add the job after the TaskExecutor, because otherwise the service has not
        // been properly started. This will also offer the slots to the job master
        jobLeaderService.addJob(jobId, jobManagerAddress);
        verify(jobMasterGateway).offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class));
        // submit the task without having acknowledge the offered slots
        taskManager.submitTask(tdd, jobManagerLeaderId);
        // acknowledge the offered slots
        offerResultFuture.complete(Collections.singleton(offer1));
        verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
        assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
        assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
        assertTrue(taskSlotTable.isSlotFree(1));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) InstanceID(org.apache.flink.runtime.instance.InstanceID) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) FileCache(org.apache.flink.runtime.filecache.FileCache) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Time(org.apache.flink.api.common.time.Time) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) SerializedValue(org.apache.flink.util.SerializedValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) Test(org.junit.Test)

Example 5 with ResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.ResourceManagerGateway in project flink by apache.

the class MiniCluster method waitUntilTaskManagerRegistrationsComplete.

public void waitUntilTaskManagerRegistrationsComplete() throws Exception {
    LeaderRetrievalService rmMasterListener = null;
    Future<LeaderAddressAndId> addressAndIdFuture;
    try {
        synchronized (lock) {
            checkState(running, "FlinkMiniCluster is not running");
            OneTimeLeaderListenerFuture listenerFuture = new OneTimeLeaderListenerFuture();
            rmMasterListener = haServices.getResourceManagerLeaderRetriever();
            rmMasterListener.start(listenerFuture);
            addressAndIdFuture = listenerFuture.future();
        }
        final LeaderAddressAndId addressAndId = addressAndIdFuture.get();
        final ResourceManagerGateway resourceManager = commonRpcService.connect(addressAndId.leaderAddress(), ResourceManagerGateway.class).get();
        final int numTaskManagersToWaitFor = taskManagerRunners.length;
        // poll and wait until enough TaskManagers are available
        while (true) {
            int numTaskManagersAvailable = resourceManager.getNumberOfRegisteredTaskManagers(addressAndId.leaderId()).get();
            if (numTaskManagersAvailable >= numTaskManagersToWaitFor) {
                break;
            }
            Thread.sleep(2);
        }
    } finally {
        try {
            if (rmMasterListener != null) {
                rmMasterListener.stop();
            }
        } catch (Exception e) {
            LOG.warn("Error shutting down leader listener for ResourceManager");
        }
    }
}
Also used : LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) LeaderAddressAndId(org.apache.flink.runtime.leaderelection.LeaderAddressAndId) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway)

Aggregations

ResourceManagerGateway (org.apache.flink.runtime.resourcemanager.ResourceManagerGateway)9 UUID (java.util.UUID)7 Time (org.apache.flink.api.common.time.Time)7 TestingSerialRpcService (org.apache.flink.runtime.rpc.TestingSerialRpcService)7 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)6 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)6 FileCache (org.apache.flink.runtime.filecache.FileCache)6 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)6 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)6 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)6 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)6 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)6 TaskManagerMetricGroup (org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup)6 TaskSlotTable (org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable)6 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)6 TestingFatalErrorHandler (org.apache.flink.runtime.util.TestingFatalErrorHandler)6 Test (org.junit.Test)6 JobID (org.apache.flink.api.common.JobID)5 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)5 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)5