Search in sources :

Example 16 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class DefaultSlotStatusSyncerTest method testSlotStatusProcessing.

@Test
public void testSlotStatusProcessing() {
    final FineGrainedTaskManagerTracker taskManagerTracker = new FineGrainedTaskManagerTracker();
    final ResourceTracker resourceTracker = new DefaultResourceTracker();
    final SlotStatusSyncer slotStatusSyncer = new DefaultSlotStatusSyncer(TASK_MANAGER_REQUEST_TIMEOUT);
    slotStatusSyncer.initialize(taskManagerTracker, resourceTracker, ResourceManagerId.generate(), TestingUtils.defaultExecutor());
    final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(ignored -> new CompletableFuture<>()).createTestingTaskExecutorGateway();
    final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(ResourceID.generate(), taskExecutorGateway);
    final JobID jobId = new JobID();
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotID slotId1 = new SlotID(taskExecutorConnection.getResourceID(), 0);
    final SlotID slotId2 = new SlotID(taskExecutorConnection.getResourceID(), 1);
    final SlotID slotId3 = new SlotID(taskExecutorConnection.getResourceID(), 2);
    final ResourceProfile totalResource = ResourceProfile.fromResources(5, 20);
    final ResourceProfile resource = ResourceProfile.fromResources(1, 4);
    final SlotReport slotReport1 = new SlotReport(Arrays.asList(new SlotStatus(slotId1, totalResource), new SlotStatus(slotId2, resource, jobId, allocationId1), new SlotStatus(slotId3, resource, jobId, allocationId2)));
    final SlotReport slotReport2 = new SlotReport(Arrays.asList(new SlotStatus(slotId3, resource), new SlotStatus(slotId2, resource, jobId, allocationId1)));
    taskManagerTracker.addTaskManager(taskExecutorConnection, totalResource, totalResource);
    slotStatusSyncer.reportSlotStatus(taskExecutorConnection.getInstanceID(), slotReport1);
    assertThat(resourceTracker.getAcquiredResources(jobId), contains(ResourceRequirement.create(resource, 2)));
    assertThat(taskManagerTracker.getRegisteredTaskManager(taskExecutorConnection.getInstanceID()).get().getAvailableResource(), equalTo(ResourceProfile.fromResources(3, 12)));
    assertTrue(taskManagerTracker.getAllocatedOrPendingSlot(allocationId1).isPresent());
    assertTrue(taskManagerTracker.getAllocatedOrPendingSlot(allocationId2).isPresent());
    slotStatusSyncer.allocateSlot(taskExecutorConnection.getInstanceID(), jobId, "address", resource);
    assertThat(resourceTracker.getAcquiredResources(jobId), contains(ResourceRequirement.create(resource, 3)));
    assertThat(taskManagerTracker.getRegisteredTaskManager(taskExecutorConnection.getInstanceID()).get().getAvailableResource(), equalTo(ResourceProfile.fromResources(2, 8)));
    final AllocationID allocationId3 = taskManagerTracker.getRegisteredTaskManager(taskExecutorConnection.getInstanceID()).get().getAllocatedSlots().keySet().stream().filter(allocationId -> !allocationId.equals(allocationId1) && !allocationId.equals(allocationId2)).findAny().get();
    // allocationId1 should still be allocated; allocationId2 should be freed; allocationId3
    // should continue to be in a pending state;
    slotStatusSyncer.reportSlotStatus(taskExecutorConnection.getInstanceID(), slotReport2);
    assertThat(resourceTracker.getAcquiredResources(jobId), contains(ResourceRequirement.create(resource, 2)));
    assertThat(taskManagerTracker.getRegisteredTaskManager(taskExecutorConnection.getInstanceID()).get().getAvailableResource(), equalTo(ResourceProfile.fromResources(3, 12)));
    assertTrue(taskManagerTracker.getAllocatedOrPendingSlot(allocationId1).isPresent());
    assertFalse(taskManagerTracker.getAllocatedOrPendingSlot(allocationId2).isPresent());
    assertTrue(taskManagerTracker.getAllocatedOrPendingSlot(allocationId3).isPresent());
    assertThat(taskManagerTracker.getAllocatedOrPendingSlot(allocationId1).get().getState(), is(SlotState.ALLOCATED));
    assertThat(taskManagerTracker.getAllocatedOrPendingSlot(allocationId3).get().getState(), is(SlotState.PENDING));
}
Also used : TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) CompletableFuture(java.util.concurrent.CompletableFuture) Assert.assertThat(org.junit.Assert.assertThat) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestLogger(org.apache.flink.util.TestLogger) Is.is(org.hamcrest.core.Is.is) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Matchers.empty(org.hamcrest.Matchers.empty) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestingUtils(org.apache.flink.testutils.TestingUtils) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) CompletableFuture(java.util.concurrent.CompletableFuture) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Test(org.junit.Test)

Example 17 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManager method registerTaskExecutor.

/**
	 * Registers a TaskExecutor
	 * @param resourceID TaskExecutor's ResourceID
	 * @param registration TaskExecutor's registration
	 * @param slotReport TaskExecutor's free and allocated slots
	 */
public void registerTaskExecutor(ResourceID resourceID, TaskExecutorRegistration registration, SlotReport slotReport) {
    if (taskManagers.get(resourceID) != null) {
        notifyTaskManagerFailure(resourceID);
    }
    this.taskManagers.put(resourceID, registration);
    for (SlotStatus slotStatus : slotReport.getSlotsStatus()) {
        final SlotID slotId = slotStatus.getSlotID();
        final TaskExecutorRegistration taskExecutorRegistration = taskManagers.get(slotId.getResourceID());
        if (taskExecutorRegistration == null) {
            LOG.info("Received SlotStatus but ResourceID {} is unknown to the SlotManager", slotId.getResourceID());
            return;
        }
        final ResourceSlot slot = new ResourceSlot(slotId, slotStatus.getProfiler(), taskExecutorRegistration);
        registerNewSlot(slot);
        LOG.info("New slot appeared, SlotID:{}, AllocationID:{}", slotId, slotStatus.getAllocationID());
        if (slotStatus.getAllocationID() != null) {
            // slot in use, record this in bookkeeping
            allocationMap.addAllocation(slotId, slotStatus.getAllocationID());
        } else {
            handleFreeSlot(slot);
        }
    }
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorRegistration) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) ResourceSlot(org.apache.flink.runtime.clusterframework.types.ResourceSlot)

Example 18 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManagerTest method testNewlyAppearedFreeSlotFulfillPendingRequest.

/**
	 * Tests that a new slot appeared in SlotReport, and we used it to fulfill a pending request
	 */
@Test
public void testNewlyAppearedFreeSlotFulfillPendingRequest() {
    TestingSlotManager slotManager = new TestingSlotManager();
    slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
    assertEquals(1, slotManager.getPendingRequestCount());
    SlotID slotId = SlotID.generate();
    SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
    SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
    slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorRegistration, slotReport);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertEquals(0, slotManager.getPendingRequestCount());
    assertTrue(slotManager.isAllocated(slotId));
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) SlotRequest(org.apache.flink.runtime.resourcemanager.SlotRequest) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotManagerTest method testNewlyAppearedInUseSlot.

/**
	 * Tests that a new slot appeared in SlotReport, and it's been reported using by some job
	 */
@Test
public void testNewlyAppearedInUseSlot() {
    TestingSlotManager slotManager = new TestingSlotManager();
    SlotID slotId = SlotID.generate();
    SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new JobID(), new AllocationID());
    SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
    slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorRegistration, slotReport);
    assertEquals(1, slotManager.getAllocatedSlotCount());
    assertEquals(0, slotManager.getFreeSlotCount());
    assertTrue(slotManager.isAllocated(slotId));
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 20 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class SlotProtocolTest method testSlotsUnavailableRequest.

/**
	 * Tests whether
	 * 1) SlotRequest is routed to the SlotManager
	 * 2) SlotRequest is confirmed
	 * 3) SlotRequest leads to a container allocation
	 * 4) Slot becomes available and TaskExecutor gets a SlotRequest
	 */
@Test
public void testSlotsUnavailableRequest() throws Exception {
    final String rmAddress = "/rm1";
    final String jmAddress = "/jm1";
    final JobID jobID = new JobID();
    testRpcService.registerGateway(jmAddress, mock(JobMasterGateway.class));
    final TestingHighAvailabilityServices testingHaServices = new TestingHighAvailabilityServices();
    final UUID rmLeaderID = UUID.randomUUID();
    final UUID jmLeaderID = UUID.randomUUID();
    TestingLeaderElectionService rmLeaderElectionService = configureHA(testingHaServices, jobID, rmAddress, rmLeaderID, jmAddress, jmLeaderID);
    ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.seconds(5L), Time.seconds(5L), Time.minutes(5L));
    JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(testingHaServices, testRpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
    final TestingSlotManagerFactory slotManagerFactory = new TestingSlotManagerFactory();
    SpiedResourceManager resourceManager = new SpiedResourceManager(testRpcService, resourceManagerConfiguration, testingHaServices, slotManagerFactory, mock(MetricRegistry.class), jobLeaderIdService, mock(FatalErrorHandler.class));
    resourceManager.start();
    rmLeaderElectionService.isLeader(rmLeaderID);
    Future<RegistrationResponse> registrationFuture = resourceManager.registerJobManager(rmLeaderID, jmLeaderID, jmAddress, jobID);
    try {
        registrationFuture.get(5, TimeUnit.SECONDS);
    } catch (Exception e) {
        Assert.fail("JobManager registration Future didn't become ready.");
    }
    final SlotManager slotManager = slotManagerFactory.slotManager;
    final AllocationID allocationID = new AllocationID();
    final ResourceProfile resourceProfile = new ResourceProfile(1.0, 100);
    SlotRequest slotRequest = new SlotRequest(jobID, allocationID, resourceProfile);
    RMSlotRequestReply slotRequestReply = resourceManager.requestSlot(jmLeaderID, rmLeaderID, slotRequest);
    // 1) SlotRequest is routed to the SlotManager
    verify(slotManager).requestSlot(slotRequest);
    // 2) SlotRequest is confirmed
    Assert.assertEquals(slotRequestReply.getAllocationID(), allocationID);
    // 3) SlotRequest leads to a container allocation
    Assert.assertEquals(1, resourceManager.startNewWorkerCalled);
    Assert.assertFalse(slotManager.isAllocated(allocationID));
    // slot becomes available
    final String tmAddress = "/tm1";
    TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
    Mockito.when(taskExecutorGateway.requestSlot(any(SlotID.class), any(JobID.class), any(AllocationID.class), any(String.class), any(UUID.class), any(Time.class))).thenReturn(new FlinkCompletableFuture<TMSlotRequestReply>());
    testRpcService.registerGateway(tmAddress, taskExecutorGateway);
    final ResourceID resourceID = ResourceID.generate();
    final SlotID slotID = new SlotID(resourceID, 0);
    final SlotStatus slotStatus = new SlotStatus(slotID, resourceProfile);
    final SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
    // register slot at SlotManager
    slotManager.registerTaskExecutor(resourceID, new TaskExecutorRegistration(taskExecutorGateway), slotReport);
    // 4) Slot becomes available and TaskExecutor gets a SlotRequest
    verify(taskExecutorGateway, timeout(5000)).requestSlot(eq(slotID), eq(jobID), eq(allocationID), any(String.class), any(UUID.class), any(Time.class));
}
Also used : TMSlotRequestReply(org.apache.flink.runtime.resourcemanager.messages.taskexecutor.TMSlotRequestReply) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorRegistration) JobLeaderIdService(org.apache.flink.runtime.resourcemanager.JobLeaderIdService) Time(org.apache.flink.api.common.time.Time) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) SlotRequest(org.apache.flink.runtime.resourcemanager.SlotRequest) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) UUID(java.util.UUID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) RMSlotRequestReply(org.apache.flink.runtime.resourcemanager.messages.jobmanager.RMSlotRequestReply) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) ResourceManagerConfiguration(org.apache.flink.runtime.resourcemanager.ResourceManagerConfiguration) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingSlotManager(org.apache.flink.runtime.resourcemanager.TestingSlotManager) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

SlotStatus (org.apache.flink.runtime.taskexecutor.SlotStatus)28 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)23 SlotReport (org.apache.flink.runtime.taskexecutor.SlotReport)21 Test (org.junit.Test)20 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)17 JobID (org.apache.flink.api.common.JobID)15 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)10 TaskExecutorConnection (org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)8 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)7 SlotRequest (org.apache.flink.runtime.resourcemanager.SlotRequest)6 Time (org.apache.flink.api.common.time.Time)5 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)5 CompletableFuture (java.util.concurrent.CompletableFuture)4 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)4 TaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TaskExecutorGateway)4 FlinkException (org.apache.flink.util.FlinkException)4 ArrayList (java.util.ArrayList)3 TimeoutException (java.util.concurrent.TimeoutException)3 InstanceID (org.apache.flink.runtime.instance.InstanceID)3 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)3