Search in sources :

Example 1 with WorkerRegistration

use of org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration in project flink by apache.

the class ResourceManager method requestTaskManagerInfo.

@Override
public CompletableFuture<Collection<TaskManagerInfo>> requestTaskManagerInfo(Time timeout) {
    final ArrayList<TaskManagerInfo> taskManagerInfos = new ArrayList<>(taskExecutors.size());
    for (Map.Entry<ResourceID, WorkerRegistration<WorkerType>> taskExecutorEntry : taskExecutors.entrySet()) {
        final ResourceID resourceId = taskExecutorEntry.getKey();
        final WorkerRegistration<WorkerType> taskExecutor = taskExecutorEntry.getValue();
        taskManagerInfos.add(new TaskManagerInfo(resourceId, taskExecutor.getTaskExecutorGateway().getAddress(), taskExecutor.getDataPort(), taskExecutor.getJmxPort(), taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId), slotManager.getNumberRegisteredSlotsOf(taskExecutor.getInstanceID()), slotManager.getNumberFreeSlotsOf(taskExecutor.getInstanceID()), slotManager.getRegisteredResourceOf(taskExecutor.getInstanceID()), slotManager.getFreeResourceOf(taskExecutor.getInstanceID()), taskExecutor.getHardwareDescription(), taskExecutor.getMemoryConfiguration()));
    }
    return CompletableFuture.completedFuture(taskManagerInfos);
}
Also used : ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) ArrayList(java.util.ArrayList) WorkerRegistration(org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with WorkerRegistration

use of org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration in project flink by apache.

the class ResourceManager method registerTaskExecutorInternal.

/**
 * Registers a new TaskExecutor.
 *
 * @param taskExecutorRegistration task executor registration parameters
 * @return RegistrationResponse
 */
private RegistrationResponse registerTaskExecutorInternal(TaskExecutorGateway taskExecutorGateway, TaskExecutorRegistration taskExecutorRegistration) {
    ResourceID taskExecutorResourceId = taskExecutorRegistration.getResourceId();
    WorkerRegistration<WorkerType> oldRegistration = taskExecutors.remove(taskExecutorResourceId);
    if (oldRegistration != null) {
        // TODO :: suggest old taskExecutor to stop itself
        log.debug("Replacing old registration of TaskExecutor {}.", taskExecutorResourceId.getStringWithMetadata());
        // remove old task manager registration from slot manager
        slotManager.unregisterTaskManager(oldRegistration.getInstanceID(), new ResourceManagerException(String.format("TaskExecutor %s re-connected to the ResourceManager.", taskExecutorResourceId.getStringWithMetadata())));
    }
    final WorkerType newWorker = workerStarted(taskExecutorResourceId);
    String taskExecutorAddress = taskExecutorRegistration.getTaskExecutorAddress();
    if (newWorker == null) {
        log.warn("Discard registration from TaskExecutor {} at ({}) because the framework did " + "not recognize it", taskExecutorResourceId.getStringWithMetadata(), taskExecutorAddress);
        return new TaskExecutorRegistrationRejection("The ResourceManager does not recognize this TaskExecutor.");
    } else {
        WorkerRegistration<WorkerType> registration = new WorkerRegistration<>(taskExecutorGateway, newWorker, taskExecutorRegistration.getDataPort(), taskExecutorRegistration.getJmxPort(), taskExecutorRegistration.getHardwareDescription(), taskExecutorRegistration.getMemoryConfiguration(), taskExecutorRegistration.getTotalResourceProfile(), taskExecutorRegistration.getDefaultSlotResourceProfile());
        log.info("Registering TaskManager with ResourceID {} ({}) at ResourceManager", taskExecutorResourceId.getStringWithMetadata(), taskExecutorAddress);
        taskExecutors.put(taskExecutorResourceId, registration);
        taskManagerHeartbeatManager.monitorTarget(taskExecutorResourceId, new TaskExecutorHeartbeatSender(taskExecutorGateway));
        return new TaskExecutorRegistrationSuccess(registration.getInstanceID(), resourceId, clusterInformation);
    }
}
Also used : ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskExecutorRegistrationRejection(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationRejection) TaskExecutorRegistrationSuccess(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess) WorkerRegistration(org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException)

Example 3 with WorkerRegistration

use of org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration in project flink by apache.

the class ResourceManager method requestTaskManagerMetricQueryServiceAddresses.

@Override
public CompletableFuture<Collection<Tuple2<ResourceID, String>>> requestTaskManagerMetricQueryServiceAddresses(Time timeout) {
    final ArrayList<CompletableFuture<Optional<Tuple2<ResourceID, String>>>> metricQueryServiceAddressFutures = new ArrayList<>(taskExecutors.size());
    for (Map.Entry<ResourceID, WorkerRegistration<WorkerType>> workerRegistrationEntry : taskExecutors.entrySet()) {
        final ResourceID tmResourceId = workerRegistrationEntry.getKey();
        final WorkerRegistration<WorkerType> workerRegistration = workerRegistrationEntry.getValue();
        final TaskExecutorGateway taskExecutorGateway = workerRegistration.getTaskExecutorGateway();
        final CompletableFuture<Optional<Tuple2<ResourceID, String>>> metricQueryServiceAddressFuture = taskExecutorGateway.requestMetricQueryServiceAddress(timeout).thenApply(o -> o.toOptional().map(address -> Tuple2.of(tmResourceId, address)));
        metricQueryServiceAddressFutures.add(metricQueryServiceAddressFuture);
    }
    return FutureUtils.combineAll(metricQueryServiceAddressFutures).thenApply(collection -> collection.stream().filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList()));
}
Also used : TaskExecutorRegistrationRejection(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationRejection) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) RpcServiceUtils(org.apache.flink.runtime.rpc.RpcServiceUtils) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) LogInfo(org.apache.flink.runtime.rest.messages.LogInfo) HeartbeatListener(org.apache.flink.runtime.heartbeat.HeartbeatListener) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) HeartbeatManager(org.apache.flink.runtime.heartbeat.HeartbeatManager) Map(java.util.Map) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) FileType(org.apache.flink.runtime.taskexecutor.FileType) JobMasterRegistrationSuccess(org.apache.flink.runtime.jobmaster.JobMasterRegistrationSuccess) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) Collection(java.util.Collection) CompletionException(java.util.concurrent.CompletionException) ResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTracker) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Objects(java.util.Objects) MetricNames(org.apache.flink.runtime.metrics.MetricNames) Optional(java.util.Optional) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) JobManagerRegistration(org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration) FlinkException(org.apache.flink.util.FlinkException) ResourceIDRetrievable(org.apache.flink.runtime.clusterframework.types.ResourceIDRetrievable) HeartbeatSender(org.apache.flink.runtime.heartbeat.HeartbeatSender) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) ArrayList(java.util.ArrayList) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerMetricGroup(org.apache.flink.runtime.metrics.groups.ResourceManagerMetricGroup) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobMaster(org.apache.flink.runtime.jobmaster.JobMaster) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) Nullable(javax.annotation.Nullable) FencedRpcEndpoint(org.apache.flink.runtime.rpc.FencedRpcEndpoint) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) Executor(java.util.concurrent.Executor) UnknownTaskExecutorException(org.apache.flink.runtime.resourcemanager.exceptions.UnknownTaskExecutorException) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) TaskExecutorHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorHeartbeatPayload) TaskExecutorRegistrationSuccess(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) ThreadDumpInfo(org.apache.flink.runtime.rest.messages.ThreadDumpInfo) InstanceID(org.apache.flink.runtime.instance.InstanceID) ResourceActions(org.apache.flink.runtime.resourcemanager.slotmanager.ResourceActions) TaskExecutorThreadInfoGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) ResourceManagerPartitionTrackerFactory(org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTrackerFactory) NoOpHeartbeatManager(org.apache.flink.runtime.heartbeat.NoOpHeartbeatManager) JobID(org.apache.flink.api.common.JobID) WorkerRegistration(org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration) DataSetMetaInfo(org.apache.flink.runtime.io.network.partition.DataSetMetaInfo) Optional(java.util.Optional) ArrayList(java.util.ArrayList) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Tuple2(org.apache.flink.api.java.tuple.Tuple2) WorkerRegistration(org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)3 WorkerRegistration (org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ResourceManagerException (org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException)2 TaskExecutorRegistrationRejection (org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationRejection)2 TaskExecutorRegistrationSuccess (org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess)2 Collection (java.util.Collection)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 UUID (java.util.UUID)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 CompletionException (java.util.concurrent.CompletionException)1 Executor (java.util.concurrent.Executor)1 TimeoutException (java.util.concurrent.TimeoutException)1 Collectors (java.util.stream.Collectors)1 Nullable (javax.annotation.Nullable)1 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)1 JobID (org.apache.flink.api.common.JobID)1