use of org.apache.flink.runtime.resourcemanager.WorkerResourceSpec in project flink by apache.
the class ActiveResourceManager method clearStateForWorker.
/**
* Clear states for a terminated worker.
*
* @param resourceId Identifier of the worker
* @return True if the worker is known and states are cleared; false if the worker is unknown
* (duplicate call to already cleared worker)
*/
private boolean clearStateForWorker(ResourceID resourceId) {
WorkerType worker = workerNodeMap.remove(resourceId);
if (worker == null) {
log.debug("Ignore unrecognized worker {}.", resourceId.getStringWithMetadata());
return false;
}
WorkerResourceSpec workerResourceSpec = currentAttemptUnregisteredWorkers.remove(resourceId);
previousAttemptUnregisteredWorkers.remove(resourceId);
if (workerResourceSpec != null) {
final int count = pendingWorkerCounter.decreaseAndGet(workerResourceSpec);
log.info("Worker {} with resource spec {} was requested in current attempt and has not registered." + " Current pending count after removing: {}.", resourceId.getStringWithMetadata(), workerResourceSpec, count);
}
return true;
}
use of org.apache.flink.runtime.resourcemanager.WorkerResourceSpec in project flink by apache.
the class ActiveResourceManager method requestNewWorker.
// ------------------------------------------------------------------------
// Internal
// ------------------------------------------------------------------------
private void requestNewWorker(WorkerResourceSpec workerResourceSpec) {
final TaskExecutorProcessSpec taskExecutorProcessSpec = TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, workerResourceSpec);
final int pendingCount = pendingWorkerCounter.increaseAndGet(workerResourceSpec);
log.info("Requesting new worker with resource spec {}, current pending count: {}.", workerResourceSpec, pendingCount);
// In case of start worker failures, we should wait for an interval before
// trying to start new workers.
// Otherwise, ActiveResourceManager will always re-requesting the worker,
// which keeps the main thread busy.
final CompletableFuture<WorkerType> requestResourceFuture = startWorkerCoolDown.thenCompose((ignore) -> resourceManagerDriver.requestResource(taskExecutorProcessSpec));
FutureUtils.assertNoException(requestResourceFuture.handle((worker, exception) -> {
if (exception != null) {
final int count = pendingWorkerCounter.decreaseAndGet(workerResourceSpec);
log.warn("Failed requesting worker with resource spec {}, current pending count: {}", workerResourceSpec, count, exception);
recordWorkerFailureAndPauseWorkerCreationIfNeeded();
requestWorkerIfRequired();
} else {
final ResourceID resourceId = worker.getResourceID();
workerNodeMap.put(resourceId, worker);
currentAttemptUnregisteredWorkers.put(resourceId, workerResourceSpec);
scheduleWorkerRegistrationTimeoutCheck(resourceId);
log.info("Requested worker {} with resource spec {}.", resourceId.getStringWithMetadata(), workerResourceSpec);
}
return null;
}));
}
use of org.apache.flink.runtime.resourcemanager.WorkerResourceSpec in project flink by apache.
the class TaskExecutorManagerTest method testTimeoutForUnusedTaskManager.
/**
* Tests that formerly used task managers can timeout after all of their slots have been freed.
*/
@Test
public void testTimeoutForUnusedTaskManager() throws Exception {
WorkerResourceSpec workerResourceSpec = new WorkerResourceSpec.Builder().setCpuCores(1).build();
final ResourceProfile resourceProfile = ResourceProfile.newBuilder().setCpuCores(1).build();
final Time taskManagerTimeout = Time.milliseconds(50L);
final CompletableFuture<InstanceID> releaseResourceFuture = new CompletableFuture<>();
final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().setReleaseResourceConsumer((instanceID, e) -> releaseResourceFuture.complete(instanceID)).build();
final Executor mainThreadExecutor = TestingUtils.defaultExecutor();
try (final TaskExecutorManager taskExecutorManager = createTaskExecutorManagerBuilder().setTaskManagerTimeout(taskManagerTimeout).setDefaultWorkerResourceSpec(workerResourceSpec).setResourceActions(resourceManagerActions).setMainThreadExecutor(mainThreadExecutor).createTaskExecutorManager()) {
CompletableFuture.supplyAsync(() -> {
taskExecutorManager.allocateWorker(resourceProfile);
InstanceID taskExecutorId = createAndRegisterTaskExecutor(taskExecutorManager, 1, resourceProfile);
taskExecutorManager.occupySlot(taskExecutorId);
taskExecutorManager.freeSlot(taskExecutorId);
return taskExecutorId;
}, mainThreadExecutor).thenAcceptBoth(releaseResourceFuture, (registeredInstance, releasedInstance) -> assertThat(registeredInstance, is(releasedInstance))).get();
}
}
use of org.apache.flink.runtime.resourcemanager.WorkerResourceSpec in project flink by apache.
the class TaskExecutorManagerTest method testPendingSlotNotFulfilledByAllocatedSlot.
/**
* Tests that a pending slot is not fulfilled by an already allocated slot.
*/
@Test
public void testPendingSlotNotFulfilledByAllocatedSlot() {
final int numWorkerCpuCores = 3;
final WorkerResourceSpec workerResourceSpec = new WorkerResourceSpec.Builder().setCpuCores(numWorkerCpuCores).build();
final ResourceProfile requestedSlotProfile = ResourceProfile.newBuilder().setCpuCores(numWorkerCpuCores).build();
try (final TaskExecutorManager taskExecutorManager = createTaskExecutorManagerBuilder().setDefaultWorkerResourceSpec(workerResourceSpec).setNumSlotsPerWorker(// set to one so that the slot profiles directly correspond to
1).setMaxNumSlots(2).createTaskExecutorManager()) {
// create pending slot
taskExecutorManager.allocateWorker(requestedSlotProfile);
assertThat(taskExecutorManager.getNumberPendingTaskManagerSlots(), is(1));
final TaskExecutorConnection taskExecutorConnection = createTaskExecutorConnection();
final SlotReport slotReport = new SlotReport(new SlotStatus(new SlotID(taskExecutorConnection.getResourceID(), 0), requestedSlotProfile, JobID.generate(), new AllocationID()));
taskExecutorManager.registerTaskManager(taskExecutorConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
// the slot from the task executor should be accepted, but we should still be waiting
// for the originally requested slot
assertThat(taskExecutorManager.getNumberRegisteredSlots(), is(1));
assertThat(taskExecutorManager.getNumberPendingTaskManagerSlots(), is(1));
}
}
use of org.apache.flink.runtime.resourcemanager.WorkerResourceSpec in project flink by apache.
the class SlotManagerUtilsTest method testGenerateDefaultSlotConsistentWithTaskExecutorResourceUtils.
@Test
public void testGenerateDefaultSlotConsistentWithTaskExecutorResourceUtils() {
final int numSlots = 5;
final TaskExecutorResourceSpec taskExecutorResourceSpec = new TaskExecutorResourceSpec(new CPUResource(1.0), MemorySize.parse("1m"), MemorySize.parse("2m"), MemorySize.parse("3m"), MemorySize.parse("4m"), Collections.singleton(new ExternalResource(EXTERNAL_RESOURCE_NAME, numSlots)));
final ResourceProfile resourceProfileFromTaskExecutorResourceUtils = TaskExecutorResourceUtils.generateDefaultSlotResourceProfile(taskExecutorResourceSpec, numSlots);
final ResourceProfile totalResourceProfile = TaskExecutorResourceUtils.generateTotalAvailableResourceProfile(taskExecutorResourceSpec);
final WorkerResourceSpec workerResourceSpec = WorkerResourceSpec.fromTotalResourceProfile(totalResourceProfile, numSlots);
assertThat(SlotManagerUtils.generateDefaultSlotResourceProfile(totalResourceProfile, numSlots), is(resourceProfileFromTaskExecutorResourceUtils));
assertThat(SlotManagerUtils.generateDefaultSlotResourceProfile(workerResourceSpec, numSlots), is(resourceProfileFromTaskExecutorResourceUtils));
}
Aggregations