use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class ActiveResourceManager method requestNewWorker.
// ------------------------------------------------------------------------
// Internal
// ------------------------------------------------------------------------
private void requestNewWorker(WorkerResourceSpec workerResourceSpec) {
final TaskExecutorProcessSpec taskExecutorProcessSpec = TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, workerResourceSpec);
final int pendingCount = pendingWorkerCounter.increaseAndGet(workerResourceSpec);
log.info("Requesting new worker with resource spec {}, current pending count: {}.", workerResourceSpec, pendingCount);
// In case of start worker failures, we should wait for an interval before
// trying to start new workers.
// Otherwise, ActiveResourceManager will always re-requesting the worker,
// which keeps the main thread busy.
final CompletableFuture<WorkerType> requestResourceFuture = startWorkerCoolDown.thenCompose((ignore) -> resourceManagerDriver.requestResource(taskExecutorProcessSpec));
FutureUtils.assertNoException(requestResourceFuture.handle((worker, exception) -> {
if (exception != null) {
final int count = pendingWorkerCounter.decreaseAndGet(workerResourceSpec);
log.warn("Failed requesting worker with resource spec {}, current pending count: {}", workerResourceSpec, count, exception);
recordWorkerFailureAndPauseWorkerCreationIfNeeded();
requestWorkerIfRequired();
} else {
final ResourceID resourceId = worker.getResourceID();
workerNodeMap.put(resourceId, worker);
currentAttemptUnregisteredWorkers.put(resourceId, workerResourceSpec);
scheduleWorkerRegistrationTimeoutCheck(resourceId);
log.info("Requested worker {} with resource spec {}.", resourceId.getStringWithMetadata(), workerResourceSpec);
}
return null;
}));
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class KubernetesResourceManagerDriver method requestResource.
@Override
public CompletableFuture<KubernetesWorkerNode> requestResource(TaskExecutorProcessSpec taskExecutorProcessSpec) {
final KubernetesTaskManagerParameters parameters = createKubernetesTaskManagerParameters(taskExecutorProcessSpec);
final KubernetesPod taskManagerPod = KubernetesTaskManagerFactory.buildTaskManagerKubernetesPod(taskManagerPodTemplate, parameters);
final String podName = taskManagerPod.getName();
final CompletableFuture<KubernetesWorkerNode> requestResourceFuture = new CompletableFuture<>();
requestResourceFutures.put(podName, requestResourceFuture);
log.info("Creating new TaskManager pod with name {} and resource <{},{}>.", podName, parameters.getTaskManagerMemoryMB(), parameters.getTaskManagerCPU());
final CompletableFuture<Void> createPodFuture = flinkKubeClient.createTaskManagerPod(taskManagerPod);
FutureUtils.assertNoException(createPodFuture.handleAsync((ignore, exception) -> {
if (exception != null) {
log.warn("Could not create pod {}, exception: {}", podName, exception);
CompletableFuture<KubernetesWorkerNode> future = requestResourceFutures.remove(taskManagerPod.getName());
if (future != null) {
future.completeExceptionally(exception);
}
} else {
log.info("Pod {} is created.", podName);
}
return null;
}, getMainThreadExecutor()));
return requestResourceFuture;
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class BashJavaUtils method getTmResourceParams.
/**
* Generate and print JVM parameters and dynamic configs of task executor resources. The last
* two lines of the output should be JVM parameters and dynamic configs respectively.
*/
private static List<String> getTmResourceParams(Configuration configuration) {
Configuration configurationWithFallback = TaskExecutorProcessUtils.getConfigurationMapLegacyTaskManagerHeapSizeToConfigOption(configuration, TaskManagerOptions.TOTAL_FLINK_MEMORY);
TaskExecutorProcessSpec taskExecutorProcessSpec = TaskExecutorProcessUtils.processSpecFromConfig(configurationWithFallback);
logTaskExecutorConfiguration(taskExecutorProcessSpec);
return Arrays.asList(ProcessMemoryUtils.generateJvmParametersStr(taskExecutorProcessSpec), TaskExecutorProcessUtils.generateDynamicConfigsStr(taskExecutorProcessSpec));
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class YarnResourceManagerDriver method requestResource.
@Override
public CompletableFuture<YarnWorkerNode> requestResource(TaskExecutorProcessSpec taskExecutorProcessSpec) {
checkInitialized();
final CompletableFuture<YarnWorkerNode> requestResourceFuture = new CompletableFuture<>();
final Optional<TaskExecutorProcessSpecContainerResourcePriorityAdapter.PriorityAndResource> priorityAndResourceOpt = taskExecutorProcessSpecContainerResourcePriorityAdapter.getPriorityAndResource(taskExecutorProcessSpec);
if (!priorityAndResourceOpt.isPresent()) {
requestResourceFuture.completeExceptionally(new ResourceManagerException(String.format("Could not compute the container Resource from the given TaskExecutorProcessSpec %s. " + "This usually indicates the requested resource is larger than Yarn's max container resource limit.", taskExecutorProcessSpec)));
} else {
final Priority priority = priorityAndResourceOpt.get().getPriority();
final Resource resource = priorityAndResourceOpt.get().getResource();
resourceManagerClient.addContainerRequest(ContainerRequestReflector.INSTANCE.getContainerRequest(resource, priority, taskManagerNodeLabel));
// make sure we transmit the request fast and receive fast news of granted allocations
resourceManagerClient.setHeartbeatInterval(containerRequestHeartbeatIntervalMillis);
requestResourceFutures.computeIfAbsent(taskExecutorProcessSpec, ignore -> new LinkedList<>()).add(requestResourceFuture);
log.info("Requesting new TaskExecutor container with resource {}, priority {}.", taskExecutorProcessSpec, priority);
}
return requestResourceFuture;
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class YarnResourceManagerDriver method onContainersOfPriorityAllocated.
// ------------------------------------------------------------------------
// Internal
// ------------------------------------------------------------------------
private void onContainersOfPriorityAllocated(Priority priority, List<Container> containers) {
final Optional<TaskExecutorProcessSpecContainerResourcePriorityAdapter.TaskExecutorProcessSpecAndResource> taskExecutorProcessSpecAndResourceOpt = taskExecutorProcessSpecContainerResourcePriorityAdapter.getTaskExecutorProcessSpecAndResource(priority);
Preconditions.checkState(taskExecutorProcessSpecAndResourceOpt.isPresent(), "Receive %s containers with unrecognized priority %s. This should not happen.", containers.size(), priority.getPriority());
final TaskExecutorProcessSpec taskExecutorProcessSpec = taskExecutorProcessSpecAndResourceOpt.get().getTaskExecutorProcessSpec();
final Resource resource = taskExecutorProcessSpecAndResourceOpt.get().getResource();
final Queue<CompletableFuture<YarnWorkerNode>> pendingRequestResourceFutures = requestResourceFutures.getOrDefault(taskExecutorProcessSpec, new LinkedList<>());
log.info("Received {} containers with priority {}, {} pending container requests.", containers.size(), priority, pendingRequestResourceFutures.size());
final Iterator<Container> containerIterator = containers.iterator();
final Iterator<AMRMClient.ContainerRequest> pendingContainerRequestIterator = getPendingRequestsAndCheckConsistency(priority, resource, pendingRequestResourceFutures.size()).iterator();
int numAccepted = 0;
while (containerIterator.hasNext() && pendingContainerRequestIterator.hasNext()) {
final Container container = containerIterator.next();
final AMRMClient.ContainerRequest pendingRequest = pendingContainerRequestIterator.next();
final ResourceID resourceId = getContainerResourceId(container);
final CompletableFuture<YarnWorkerNode> requestResourceFuture = pendingRequestResourceFutures.poll();
Preconditions.checkState(requestResourceFuture != null);
if (pendingRequestResourceFutures.isEmpty()) {
requestResourceFutures.remove(taskExecutorProcessSpec);
}
startTaskExecutorInContainerAsync(container, taskExecutorProcessSpec, resourceId, requestResourceFuture);
removeContainerRequest(pendingRequest);
numAccepted++;
}
int numExcess = 0;
while (containerIterator.hasNext()) {
returnExcessContainer(containerIterator.next());
numExcess++;
}
log.info("Accepted {} requested containers, returned {} excess containers, {} pending container requests of resource {}.", numAccepted, numExcess, pendingRequestResourceFutures.size(), resource);
}
Aggregations