Search in sources :

Example 21 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class TaskSlotTable method createSlotReport.

// ---------------------------------------------------------------------
// Slot report methods
// ---------------------------------------------------------------------
public SlotReport createSlotReport(ResourceID resourceId) {
    final int numberSlots = taskSlots.size();
    List<SlotStatus> slotStatuses = Arrays.asList(new SlotStatus[numberSlots]);
    for (int i = 0; i < numberSlots; i++) {
        TaskSlot taskSlot = taskSlots.get(i);
        SlotID slotId = new SlotID(resourceId, taskSlot.getIndex());
        SlotStatus slotStatus = new SlotStatus(slotId, taskSlot.getResourceProfile(), taskSlot.getJobId(), taskSlot.getAllocationId());
        slotStatuses.set(i, slotStatus);
    }
    final SlotReport slotReport = new SlotReport(slotStatuses);
    return slotReport;
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport)

Example 22 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class ResourceManagerTaskExecutorTest method testDisconnectTaskExecutor.

/**
 * Tests that a TaskExecutor can disconnect from the {@link ResourceManager}.
 */
@Test
public void testDisconnectTaskExecutor() throws Exception {
    final int numberSlots = 10;
    final TaskExecutorRegistration taskExecutorRegistration = new TaskExecutorRegistration(taskExecutorGateway.getAddress(), taskExecutorResourceID, dataPort, jmxPort, hardwareDescription, new TaskExecutorMemoryConfiguration(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), DEFAULT_SLOT_PROFILE, DEFAULT_SLOT_PROFILE.multiply(numberSlots));
    final RegistrationResponse registrationResponse = rmGateway.registerTaskExecutor(taskExecutorRegistration, TIMEOUT).get();
    assertThat(registrationResponse, instanceOf(TaskExecutorRegistrationSuccess.class));
    final InstanceID registrationId = ((TaskExecutorRegistrationSuccess) registrationResponse).getRegistrationId();
    final Collection<SlotStatus> slots = createSlots(numberSlots);
    final SlotReport slotReport = new SlotReport(slots);
    rmGateway.sendSlotReport(taskExecutorResourceID, registrationId, slotReport, TIMEOUT).get();
    final ResourceOverview resourceOverview = rmGateway.requestResourceOverview(TIMEOUT).get();
    assertThat(resourceOverview.getNumberTaskManagers(), is(1));
    assertThat(resourceOverview.getNumberRegisteredSlots(), is(numberSlots));
    rmGateway.disconnectTaskManager(taskExecutorResourceID, new FlinkException("testDisconnectTaskExecutor"));
    final ResourceOverview afterDisconnectResourceOverview = rmGateway.requestResourceOverview(TIMEOUT).get();
    assertThat(afterDisconnectResourceOverview.getNumberTaskManagers(), is(0));
    assertThat(afterDisconnectResourceOverview.getNumberRegisteredSlots(), is(0));
}
Also used : InstanceID(org.apache.flink.runtime.instance.InstanceID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) TaskExecutorRegistrationSuccess(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) FlinkException(org.apache.flink.util.FlinkException) Test(org.junit.Test)

Example 23 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class TaskSlotTableImplTest method testGenerateSlotReport.

@Test
public void testGenerateSlotReport() throws Exception {
    try (final TaskSlotTable<TaskSlotPayload> taskSlotTable = createTaskSlotTableAndStart(3)) {
        final JobID jobId = new JobID();
        final AllocationID allocationId1 = new AllocationID();
        final AllocationID allocationId2 = new AllocationID();
        final AllocationID allocationId3 = new AllocationID();
        assertThat(taskSlotTable.allocateSlot(0, jobId, allocationId1, SLOT_TIMEOUT), // index 0
        is(true));
        assertThat(taskSlotTable.allocateSlot(-1, jobId, allocationId2, SLOT_TIMEOUT), // index 3
        is(true));
        assertThat(taskSlotTable.allocateSlot(-1, jobId, allocationId3, SLOT_TIMEOUT), // index 4
        is(true));
        assertThat(taskSlotTable.freeSlot(allocationId2), is(3));
        ResourceID resourceId = ResourceID.generate();
        SlotReport slotReport = taskSlotTable.createSlotReport(resourceId);
        List<SlotStatus> slotStatuses = new ArrayList<>();
        slotReport.iterator().forEachRemaining(slotStatuses::add);
        assertThat(slotStatuses.size(), is(4));
        assertThat(slotStatuses, containsInAnyOrder(is(new SlotStatus(new SlotID(resourceId, 0), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, jobId, allocationId1)), is(new SlotStatus(new SlotID(resourceId, 1), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, null, null)), is(new SlotStatus(new SlotID(resourceId, 2), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, null, null)), is(new SlotStatus(new SlotID(resourceId, 4), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, jobId, allocationId3))));
    }
}
Also used : SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) ArrayList(java.util.ArrayList) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 24 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class DeclarativeSlotManager method allocateSlot.

/**
 * Allocates the given slot. This entails sending a registration message to the task manager and
 * treating failures.
 *
 * @param taskManagerSlot slot to allocate
 * @param jobId job for which the slot should be allocated for
 * @param targetAddress address of the job master
 * @param resourceProfile resource profile for the requirement for which the slot is used
 */
private void allocateSlot(TaskManagerSlotInformation taskManagerSlot, JobID jobId, String targetAddress, ResourceProfile resourceProfile) {
    final SlotID slotId = taskManagerSlot.getSlotId();
    LOG.debug("Starting allocation of slot {} for job {} with resource profile {}.", slotId, jobId, resourceProfile);
    final InstanceID instanceId = taskManagerSlot.getInstanceId();
    if (!taskExecutorManager.isTaskManagerRegistered(instanceId)) {
        throw new IllegalStateException("Could not find a registered task manager for instance id " + instanceId + '.');
    }
    final TaskExecutorConnection taskExecutorConnection = taskManagerSlot.getTaskManagerConnection();
    final TaskExecutorGateway gateway = taskExecutorConnection.getTaskExecutorGateway();
    final AllocationID allocationId = new AllocationID();
    slotTracker.notifyAllocationStart(slotId, jobId);
    taskExecutorManager.markUsed(instanceId);
    pendingSlotAllocations.put(slotId, allocationId);
    // RPC call to the task manager
    CompletableFuture<Acknowledge> requestFuture = gateway.requestSlot(slotId, jobId, allocationId, resourceProfile, targetAddress, resourceManagerId, taskManagerRequestTimeout);
    CompletableFuture<Void> slotAllocationResponseProcessingFuture = requestFuture.handleAsync((Acknowledge acknowledge, Throwable throwable) -> {
        final AllocationID currentAllocationForSlot = pendingSlotAllocations.get(slotId);
        if (currentAllocationForSlot == null || !currentAllocationForSlot.equals(allocationId)) {
            LOG.debug("Ignoring slot allocation update from task executor {} for slot {} and job {}, because the allocation was already completed or cancelled.", instanceId, slotId, jobId);
            return null;
        }
        if (acknowledge != null) {
            LOG.trace("Completed allocation of slot {} for job {}.", slotId, jobId);
            slotTracker.notifyAllocationComplete(slotId, jobId);
        } else {
            if (throwable instanceof SlotOccupiedException) {
                SlotOccupiedException exception = (SlotOccupiedException) throwable;
                LOG.debug("Tried allocating slot {} for job {}, but it was already allocated for job {}.", slotId, jobId, exception.getJobId());
                // report as a slot status to force the state transition
                // this could be a problem if we ever assume that the task
                // executor always reports about all slots
                slotTracker.notifySlotStatus(Collections.singleton(new SlotStatus(slotId, taskManagerSlot.getResourceProfile(), exception.getJobId(), exception.getAllocationId())));
            } else {
                LOG.warn("Slot allocation for slot {} for job {} failed.", slotId, jobId, throwable);
                slotTracker.notifyFree(slotId);
            }
            checkResourceRequirements();
        }
        return null;
    }, mainThreadExecutor);
    FutureUtils.assertNoException(slotAllocationResponseProcessingFuture);
}
Also used : InstanceID(org.apache.flink.runtime.instance.InstanceID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)

Example 25 with SlotStatus

use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.

the class DefaultSlotTracker method notifySlotStatus.

// ---------------------------------------------------------------------------------------------
// TaskExecutor slot status API - acts as source of truth
// ---------------------------------------------------------------------------------------------
@Override
public boolean notifySlotStatus(Iterable<SlotStatus> slotStatuses) {
    Preconditions.checkNotNull(slotStatuses);
    boolean anyStatusChanged = false;
    for (SlotStatus slotStatus : slotStatuses) {
        anyStatusChanged |= slotStatusStateReconciler.executeStateTransition(slots.get(slotStatus.getSlotID()), slotStatus.getJobID());
    }
    return anyStatusChanged;
}
Also used : SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus)

Aggregations

SlotStatus (org.apache.flink.runtime.taskexecutor.SlotStatus)28 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)23 SlotReport (org.apache.flink.runtime.taskexecutor.SlotReport)21 Test (org.junit.Test)20 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)17 JobID (org.apache.flink.api.common.JobID)15 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)10 TaskExecutorConnection (org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)8 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)7 SlotRequest (org.apache.flink.runtime.resourcemanager.SlotRequest)6 Time (org.apache.flink.api.common.time.Time)5 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)5 CompletableFuture (java.util.concurrent.CompletableFuture)4 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)4 TaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TaskExecutorGateway)4 FlinkException (org.apache.flink.util.FlinkException)4 ArrayList (java.util.ArrayList)3 TimeoutException (java.util.concurrent.TimeoutException)3 InstanceID (org.apache.flink.runtime.instance.InstanceID)3 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)3