Search in sources :

Example 11 with TestingSerialRpcService

use of org.apache.flink.runtime.rpc.TestingSerialRpcService in project flink by apache.

the class TaskExecutorTest method testSlotAcceptance.

/**
	 * Tests that accepted slots go into state assigned and the others are returned to the resource
	 * manager.
	 */
@Test
public void testSlotAcceptance() throws Exception {
    final JobID jobId = new JobID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final Configuration configuration = new Configuration();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final ResourceID resourceId = new ResourceID("foobar");
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TimerService<AllocationID> timerService = mock(TimerService.class);
    final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
    final JobManagerTable jobManagerTable = new JobManagerTable();
    final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String resourceManagerAddress = "rm";
    final UUID resourceManagerLeaderId = UUID.randomUUID();
    final String jobManagerAddress = "jm";
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
    final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
    haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
    haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
    final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
    final InstanceID registrationId = new InstanceID();
    when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final int blobPort = 42;
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
    final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
    when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
    when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
    when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.completed((Iterable<SlotOffer>) Collections.singleton(offer1)));
    rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    try {
        TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
        taskManager.start();
        taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
        taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
        // we have to add the job after the TaskExecutor, because otherwise the service has not
        // been properly started.
        jobLeaderService.addJob(jobId, jobManagerAddress);
        verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
        assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
        assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
        assertTrue(taskSlotTable.isSlotFree(1));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) InstanceID(org.apache.flink.runtime.instance.InstanceID) Time(org.apache.flink.api.common.time.Time) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 12 with TestingSerialRpcService

use of org.apache.flink.runtime.rpc.TestingSerialRpcService in project flink by apache.

the class ResourceManagerHATest method testGrantAndRevokeLeadership.

@Test
public void testGrantAndRevokeLeadership() throws Exception {
    RpcService rpcService = new TestingSerialRpcService();
    TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    highAvailabilityServices.setResourceManagerLeaderElectionService(leaderElectionService);
    ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.seconds(5L), Time.seconds(5L), Time.minutes(5L));
    SlotManagerFactory slotManagerFactory = new TestingSlotManagerFactory();
    MetricRegistry metricRegistry = mock(MetricRegistry.class);
    JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, rpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
    TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final ResourceManager resourceManager = new StandaloneResourceManager(rpcService, resourceManagerConfiguration, highAvailabilityServices, slotManagerFactory, metricRegistry, jobLeaderIdService, testingFatalErrorHandler);
    resourceManager.start();
    // before grant leadership, resourceManager's leaderId is null
    Assert.assertEquals(null, resourceManager.getLeaderSessionId());
    final UUID leaderId = UUID.randomUUID();
    leaderElectionService.isLeader(leaderId);
    // after grant leadership, resourceManager's leaderId has value
    Assert.assertEquals(leaderId, resourceManager.getLeaderSessionId());
    // then revoke leadership, resourceManager's leaderId is null again
    leaderElectionService.notLeader();
    Assert.assertEquals(null, resourceManager.getLeaderSessionId());
    if (testingFatalErrorHandler.hasExceptionOccurred()) {
        testingFatalErrorHandler.rethrowError();
    }
}
Also used : TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SlotManagerFactory(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) Test(org.junit.Test)

Example 13 with TestingSerialRpcService

use of org.apache.flink.runtime.rpc.TestingSerialRpcService in project flink by apache.

the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.

@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
    haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
    haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String jobManagerAddress = "jm";
    final UUID jmLeaderId = UUID.randomUUID();
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final String taskManagerAddress = "tm";
    final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
    final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
    final long heartbeatInterval = 1L;
    final long heartbeatTimeout = 5L;
    final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
    final JobGraph jobGraph = new JobGraph();
    try {
        final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
        // also start the heartbeat manager in job manager
        jobMaster.start(jmLeaderId);
        // register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
        jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
        ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
        verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
        Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
        ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
        verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
        Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
        // run the first heartbeat request
        heartbeatRunnable.run();
        verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
        // run the timeout runnable to simulate a heartbeat timeout
        timeoutRunnable.run();
        verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) URL(java.net.URL) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TimeoutException(java.util.concurrent.TimeoutException) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) OnCompletionActions(org.apache.flink.runtime.jobmanager.OnCompletionActions) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) RestartStrategyFactory(org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

TestingSerialRpcService (org.apache.flink.runtime.rpc.TestingSerialRpcService)13 UUID (java.util.UUID)12 TestingFatalErrorHandler (org.apache.flink.runtime.util.TestingFatalErrorHandler)12 Test (org.junit.Test)11 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)10 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)10 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)10 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)10 Time (org.apache.flink.api.common.time.Time)9 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)9 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)9 FileCache (org.apache.flink.runtime.filecache.FileCache)9 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)9 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)9 TestingLeaderRetrievalService (org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService)9 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)9 TaskManagerMetricGroup (org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup)9 TaskSlotTable (org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable)9 JobID (org.apache.flink.api.common.JobID)8 Configuration (org.apache.flink.configuration.Configuration)8