use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.
the class TaskSlotTable method createSlotReport.
// ---------------------------------------------------------------------
// Slot report methods
// ---------------------------------------------------------------------
public SlotReport createSlotReport(ResourceID resourceId) {
final int numberSlots = taskSlots.size();
List<SlotStatus> slotStatuses = Arrays.asList(new SlotStatus[numberSlots]);
for (int i = 0; i < numberSlots; i++) {
TaskSlot taskSlot = taskSlots.get(i);
SlotID slotId = new SlotID(resourceId, taskSlot.getIndex());
SlotStatus slotStatus = new SlotStatus(slotId, taskSlot.getResourceProfile(), taskSlot.getJobId(), taskSlot.getAllocationId());
slotStatuses.set(i, slotStatus);
}
final SlotReport slotReport = new SlotReport(slotStatuses);
return slotReport;
}
use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.
the class ResourceManagerTaskExecutorTest method testDisconnectTaskExecutor.
/**
* Tests that a TaskExecutor can disconnect from the {@link ResourceManager}.
*/
@Test
public void testDisconnectTaskExecutor() throws Exception {
final int numberSlots = 10;
final TaskExecutorRegistration taskExecutorRegistration = new TaskExecutorRegistration(taskExecutorGateway.getAddress(), taskExecutorResourceID, dataPort, jmxPort, hardwareDescription, new TaskExecutorMemoryConfiguration(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), DEFAULT_SLOT_PROFILE, DEFAULT_SLOT_PROFILE.multiply(numberSlots));
final RegistrationResponse registrationResponse = rmGateway.registerTaskExecutor(taskExecutorRegistration, TIMEOUT).get();
assertThat(registrationResponse, instanceOf(TaskExecutorRegistrationSuccess.class));
final InstanceID registrationId = ((TaskExecutorRegistrationSuccess) registrationResponse).getRegistrationId();
final Collection<SlotStatus> slots = createSlots(numberSlots);
final SlotReport slotReport = new SlotReport(slots);
rmGateway.sendSlotReport(taskExecutorResourceID, registrationId, slotReport, TIMEOUT).get();
final ResourceOverview resourceOverview = rmGateway.requestResourceOverview(TIMEOUT).get();
assertThat(resourceOverview.getNumberTaskManagers(), is(1));
assertThat(resourceOverview.getNumberRegisteredSlots(), is(numberSlots));
rmGateway.disconnectTaskManager(taskExecutorResourceID, new FlinkException("testDisconnectTaskExecutor"));
final ResourceOverview afterDisconnectResourceOverview = rmGateway.requestResourceOverview(TIMEOUT).get();
assertThat(afterDisconnectResourceOverview.getNumberTaskManagers(), is(0));
assertThat(afterDisconnectResourceOverview.getNumberRegisteredSlots(), is(0));
}
use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.
the class TaskSlotTableImplTest method testGenerateSlotReport.
@Test
public void testGenerateSlotReport() throws Exception {
try (final TaskSlotTable<TaskSlotPayload> taskSlotTable = createTaskSlotTableAndStart(3)) {
final JobID jobId = new JobID();
final AllocationID allocationId1 = new AllocationID();
final AllocationID allocationId2 = new AllocationID();
final AllocationID allocationId3 = new AllocationID();
assertThat(taskSlotTable.allocateSlot(0, jobId, allocationId1, SLOT_TIMEOUT), // index 0
is(true));
assertThat(taskSlotTable.allocateSlot(-1, jobId, allocationId2, SLOT_TIMEOUT), // index 3
is(true));
assertThat(taskSlotTable.allocateSlot(-1, jobId, allocationId3, SLOT_TIMEOUT), // index 4
is(true));
assertThat(taskSlotTable.freeSlot(allocationId2), is(3));
ResourceID resourceId = ResourceID.generate();
SlotReport slotReport = taskSlotTable.createSlotReport(resourceId);
List<SlotStatus> slotStatuses = new ArrayList<>();
slotReport.iterator().forEachRemaining(slotStatuses::add);
assertThat(slotStatuses.size(), is(4));
assertThat(slotStatuses, containsInAnyOrder(is(new SlotStatus(new SlotID(resourceId, 0), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, jobId, allocationId1)), is(new SlotStatus(new SlotID(resourceId, 1), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, null, null)), is(new SlotStatus(new SlotID(resourceId, 2), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, null, null)), is(new SlotStatus(new SlotID(resourceId, 4), TaskSlotUtils.DEFAULT_RESOURCE_PROFILE, jobId, allocationId3))));
}
}
use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.
the class DeclarativeSlotManager method allocateSlot.
/**
* Allocates the given slot. This entails sending a registration message to the task manager and
* treating failures.
*
* @param taskManagerSlot slot to allocate
* @param jobId job for which the slot should be allocated for
* @param targetAddress address of the job master
* @param resourceProfile resource profile for the requirement for which the slot is used
*/
private void allocateSlot(TaskManagerSlotInformation taskManagerSlot, JobID jobId, String targetAddress, ResourceProfile resourceProfile) {
final SlotID slotId = taskManagerSlot.getSlotId();
LOG.debug("Starting allocation of slot {} for job {} with resource profile {}.", slotId, jobId, resourceProfile);
final InstanceID instanceId = taskManagerSlot.getInstanceId();
if (!taskExecutorManager.isTaskManagerRegistered(instanceId)) {
throw new IllegalStateException("Could not find a registered task manager for instance id " + instanceId + '.');
}
final TaskExecutorConnection taskExecutorConnection = taskManagerSlot.getTaskManagerConnection();
final TaskExecutorGateway gateway = taskExecutorConnection.getTaskExecutorGateway();
final AllocationID allocationId = new AllocationID();
slotTracker.notifyAllocationStart(slotId, jobId);
taskExecutorManager.markUsed(instanceId);
pendingSlotAllocations.put(slotId, allocationId);
// RPC call to the task manager
CompletableFuture<Acknowledge> requestFuture = gateway.requestSlot(slotId, jobId, allocationId, resourceProfile, targetAddress, resourceManagerId, taskManagerRequestTimeout);
CompletableFuture<Void> slotAllocationResponseProcessingFuture = requestFuture.handleAsync((Acknowledge acknowledge, Throwable throwable) -> {
final AllocationID currentAllocationForSlot = pendingSlotAllocations.get(slotId);
if (currentAllocationForSlot == null || !currentAllocationForSlot.equals(allocationId)) {
LOG.debug("Ignoring slot allocation update from task executor {} for slot {} and job {}, because the allocation was already completed or cancelled.", instanceId, slotId, jobId);
return null;
}
if (acknowledge != null) {
LOG.trace("Completed allocation of slot {} for job {}.", slotId, jobId);
slotTracker.notifyAllocationComplete(slotId, jobId);
} else {
if (throwable instanceof SlotOccupiedException) {
SlotOccupiedException exception = (SlotOccupiedException) throwable;
LOG.debug("Tried allocating slot {} for job {}, but it was already allocated for job {}.", slotId, jobId, exception.getJobId());
// report as a slot status to force the state transition
// this could be a problem if we ever assume that the task
// executor always reports about all slots
slotTracker.notifySlotStatus(Collections.singleton(new SlotStatus(slotId, taskManagerSlot.getResourceProfile(), exception.getJobId(), exception.getAllocationId())));
} else {
LOG.warn("Slot allocation for slot {} for job {} failed.", slotId, jobId, throwable);
slotTracker.notifyFree(slotId);
}
checkResourceRequirements();
}
return null;
}, mainThreadExecutor);
FutureUtils.assertNoException(slotAllocationResponseProcessingFuture);
}
use of org.apache.flink.runtime.taskexecutor.SlotStatus in project flink by apache.
the class DefaultSlotTracker method notifySlotStatus.
// ---------------------------------------------------------------------------------------------
// TaskExecutor slot status API - acts as source of truth
// ---------------------------------------------------------------------------------------------
@Override
public boolean notifySlotStatus(Iterable<SlotStatus> slotStatuses) {
Preconditions.checkNotNull(slotStatuses);
boolean anyStatusChanged = false;
for (SlotStatus slotStatus : slotStatuses) {
anyStatusChanged |= slotStatusStateReconciler.executeStateTransition(slots.get(slotStatus.getSlotID()), slotStatus.getJobID());
}
return anyStatusChanged;
}
Aggregations