Search in sources :

Example 1 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotProtocolTest method testSlotAvailableRequest.

/**
	 * Tests whether
	 * 1) a SlotRequest is routed to the SlotManager
	 * 2) a SlotRequest is confirmed
	 * 3) a SlotRequest leads to an allocation of a registered slot
	 * 4) a SlotRequest is routed to the TaskExecutor
	 */
@Test
public void testSlotAvailableRequest() throws Exception {
    final String rmAddress = "/rm1";
    final String jmAddress = "/jm1";
    final String tmAddress = "/tm1";
    final JobID jobID = new JobID();
    testRpcService.registerGateway(jmAddress, mock(JobMasterGateway.class));
    final TestingHighAvailabilityServices testingHaServices = new TestingHighAvailabilityServices();
    final UUID rmLeaderID = UUID.randomUUID();
    final UUID jmLeaderID = UUID.randomUUID();
    TestingLeaderElectionService rmLeaderElectionService = configureHA(testingHaServices, jobID, rmAddress, rmLeaderID, jmAddress, jmLeaderID);
    TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
    Mockito.when(taskExecutorGateway.requestSlot(any(SlotID.class), any(JobID.class), any(AllocationID.class), any(String.class), any(UUID.class), any(Time.class))).thenReturn(new FlinkCompletableFuture<TMSlotRequestReply>());
    testRpcService.registerGateway(tmAddress, taskExecutorGateway);
    ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.seconds(5L), Time.seconds(5L), Time.minutes(5L));
    JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(testingHaServices, testRpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
    TestingSlotManagerFactory slotManagerFactory = new TestingSlotManagerFactory();
    ResourceManager<ResourceID> resourceManager = Mockito.spy(new StandaloneResourceManager(testRpcService, resourceManagerConfiguration, testingHaServices, slotManagerFactory, mock(MetricRegistry.class), jobLeaderIdService, mock(FatalErrorHandler.class)));
    resourceManager.start();
    rmLeaderElectionService.isLeader(rmLeaderID);
    Thread.sleep(1000);
    Future<RegistrationResponse> registrationFuture = resourceManager.registerJobManager(rmLeaderID, jmLeaderID, jmAddress, jobID);
    try {
        registrationFuture.get(5L, TimeUnit.SECONDS);
    } catch (Exception e) {
        Assert.fail("JobManager registration Future didn't become ready.");
    }
    final SlotManager slotManager = slotManagerFactory.slotManager;
    final ResourceID resourceID = ResourceID.generate();
    final AllocationID allocationID = new AllocationID();
    final ResourceProfile resourceProfile = new ResourceProfile(1.0, 100);
    final SlotID slotID = new SlotID(resourceID, 0);
    final SlotStatus slotStatus = new SlotStatus(slotID, resourceProfile);
    final SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
    // register slot at SlotManager
    slotManager.registerTaskExecutor(resourceID, new TaskExecutorRegistration(taskExecutorGateway), slotReport);
    SlotRequest slotRequest = new SlotRequest(jobID, allocationID, resourceProfile);
    RMSlotRequestReply slotRequestReply = resourceManager.requestSlot(jmLeaderID, rmLeaderID, slotRequest);
    // 1) a SlotRequest is routed to the SlotManager
    verify(slotManager).requestSlot(slotRequest);
    // 2) a SlotRequest is confirmed
    Assert.assertEquals(slotRequestReply.getAllocationID(), allocationID);
    // 3) a SlotRequest leads to an allocation of a registered slot
    Assert.assertTrue(slotManager.isAllocated(slotID));
    Assert.assertTrue(slotManager.isAllocated(allocationID));
    // 4) a SlotRequest is routed to the TaskExecutor
    verify(taskExecutorGateway, timeout(5000)).requestSlot(eq(slotID), eq(jobID), eq(allocationID), any(String.class), any(UUID.class), any(Time.class));
}
Also used : TMSlotRequestReply(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestReply) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorRegistration) JobLeaderIdService(org.apache.flink.runtime.resourcemanager.JobLeaderIdService) Time(org.apache.flink.api.common.time.Time) StandaloneResourceManager(org.apache.flink.runtime.resourcemanager.StandaloneResourceManager) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) SlotRequest(org.apache.flink.runtime.resourcemanager.SlotRequest) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) UUID(java.util.UUID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) RMSlotRequestReply(org.apache.flink.runtime.resourcemanager.messages.jobmanager.RMSlotRequestReply) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) ResourceManagerConfiguration(org.apache.flink.runtime.resourcemanager.ResourceManagerConfiguration) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingSlotManager(org.apache.flink.runtime.resourcemanager.TestingSlotManager) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManagerTest method testNewlyAppearedFreeSlot.

/**
	 * Tests that a new slot appeared in SlotReport, but we have no pending request
	 */
@Test
public void testNewlyAppearedFreeSlot() {
    TestingSlotManager slotManager = new TestingSlotManager();
    SlotID slotId = SlotID.generate();
    SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
    SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
    slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorRegistration, slotReport);
    assertEquals(0, slotManager.getAllocatedSlotCount());
    assertEquals(1, slotManager.getFreeSlotCount());
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) Test(org.junit.Test)

Example 3 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManagerTest method testSlotAllocationFailedAtTaskManagerOccupiedByOther.

/**
	 * Tests that we did some allocation but failed / rejected by TaskManager, and slot is occupied by another request
	 * This can only occur after reconnect of the TaskExecutor.
	 */
@Test
public void testSlotAllocationFailedAtTaskManagerOccupiedByOther() {
    TestingSlotManager slotManager = new TestingSlotManager();
    final SlotID slotID = SlotID.generate();
    SlotStatus slot = new SlotStatus(slotID, DEFAULT_TESTING_PROFILE);
    SlotReport slotReport = new SlotReport(slot);
    slotManager.registerTaskExecutor(slotID.getResourceID(), taskExecutorRegistration, slotReport);
    SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
    slotManager.requestSlot(request);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
    // slot is set empty by a reconnect of the TaskExecutor
    slotManager.registerTaskExecutor(slotID.getResourceID(), taskExecutorRegistration, slotReport);
    assertEquals(0, slotManager.getAllocatedSlotCount());
    assertEquals(1, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
    // another request takes the slot
    SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
    slotManager.requestSlot(request2);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
    assertFalse(slotManager.isAllocated(request.getAllocationId()));
    assertTrue(slotManager.isAllocated(request2.getAllocationId()));
    // original request should be retried
    slotManager.handleSlotRequestFailedAtTaskManager(request, slotID);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
    assertFalse(slotManager.isAllocated(request.getAllocationId()));
    assertTrue(slotManager.isAllocated(request2.getAllocationId()));
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotRequest(org.apache.flink.runtime.resourcemanager.SlotRequest) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManagerTest method testMultipleSlotRequestsWithOneSlot.

/**
	 * Tests multiple slot requests with one slots.
	 */
@Test
public void testMultipleSlotRequestsWithOneSlot() {
    TestingSlotManager slotManager = new TestingSlotManager();
    final AllocationID allocationID = new AllocationID();
    SlotRequest request1 = new SlotRequest(new JobID(), allocationID, DEFAULT_TESTING_PROFILE);
    slotManager.requestSlot(request1);
    final ResourceID resourceID = ResourceID.generate();
    final SlotStatus slotStatus = new SlotStatus(new SlotID(resourceID, 0), DEFAULT_TESTING_PROFILE);
    final SlotReport slotReport = new SlotReport(slotStatus);
    slotManager.registerTaskExecutor(resourceID, taskExecutorRegistration, slotReport);
    // another request pending
    SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
    slotManager.requestSlot(request2);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertEquals(1, slotManager.getPendingRequestCount());
    assertTrue(slotManager.isAllocated(allocationID));
    assertTrue(slotManager.isAllocated(request1.getAllocationId()));
    // but slot is reported empty in a report in the meantime which shouldn't affect the state
    slotManager.notifySlotAvailable(resourceID, slotStatus.getSlotID());
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
    assertTrue(slotManager.isAllocated(slotStatus.getSlotID()));
    assertTrue(slotManager.isAllocated(request2.getAllocationId()));
    // but slot is reported empty in a report in the meantime which shouldn't affect the state
    slotManager.notifySlotAvailable(resourceID, slotStatus.getSlotID());
    assertEquals(0, slotManager.getAllocatedSlotCount());
    assertEquals(1, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) SlotRequest(org.apache.flink.runtime.resourcemanager.SlotRequest) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManagerTest method testExistingInUseSlotUpdateStatus.

/**
	 * Tests that we had a slot in-use and is freed again subsequently.
	 */
@Test
public void testExistingInUseSlotUpdateStatus() {
    TestingSlotManager slotManager = new TestingSlotManager();
    SlotID slotId = SlotID.generate();
    SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new JobID(), new AllocationID());
    SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
    slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorRegistration, slotReport);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertTrue(slotManager.isAllocated(slotId));
    // slot is freed again
    slotManager.notifySlotAvailable(slotId.getResourceID(), slotId);
    assertEquals(0, slotManager.getAllocatedSlotCount());
    assertEquals(1, slotManager.getFreeSlotCount());
    assertFalse(slotManager.isAllocated(slotId));
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

SlotStatus (org.apache.flink.runtime.taskexecutor.SlotStatus)28 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)23 SlotReport (org.apache.flink.runtime.taskexecutor.SlotReport)21 Test (org.junit.Test)20 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)17 JobID (org.apache.flink.api.common.JobID)15 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)10 TaskExecutorConnection (org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)8 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)7 SlotRequest (org.apache.flink.runtime.resourcemanager.SlotRequest)6 Time (org.apache.flink.api.common.time.Time)5 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)5 CompletableFuture (java.util.concurrent.CompletableFuture)4 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)4 TaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TaskExecutorGateway)4 FlinkException (org.apache.flink.util.FlinkException)4 ArrayList (java.util.ArrayList)3 TimeoutException (java.util.concurrent.TimeoutException)3 InstanceID (org.apache.flink.runtime.instance.InstanceID)3 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)3