Search in sources :

Example 1 with MasterWorkerInfo

use of alluxio.job.MasterWorkerInfo in project alluxio by Alluxio.

the class JobMaster method registerWorker.

/**
 * Returns a worker id for the given worker.
 *
 * @param workerNetAddress the worker {@link WorkerNetAddress}
 * @return the worker id for this worker
 */
public long registerWorker(WorkerNetAddress workerNetAddress) {
    // Run under exclusive lock for mWorkers
    try (LockResource workersLockExclusive = new LockResource(mWorkerRWLock.writeLock())) {
        // Check if worker has already been registered with this job master
        if (mWorkers.contains(mAddressIndex, workerNetAddress)) {
            // If the worker is trying to re-register, it must have died and been restarted. We need to
            // clean up the dead worker.
            LOG.info("Worker at address {} is re-registering. Failing tasks for previous worker at that " + "address", workerNetAddress);
            MasterWorkerInfo deadWorker = mWorkers.getFirstByField(mAddressIndex, workerNetAddress);
            for (PlanCoordinator planCoordinator : mPlanTracker.coordinators()) {
                planCoordinator.failTasksForWorker(deadWorker.getId());
            }
            mWorkerHealth.remove(deadWorker.getId());
            mWorkers.remove(deadWorker);
        }
        // Generate a new worker id.
        long workerId = mNextWorkerId.getAndIncrement();
        mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress));
        LOG.info("registerWorker(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId);
        return workerId;
    }
}
Also used : LockResource(alluxio.resource.LockResource) MasterWorkerInfo(alluxio.job.MasterWorkerInfo) PlanCoordinator(alluxio.master.job.plan.PlanCoordinator)

Example 2 with MasterWorkerInfo

use of alluxio.job.MasterWorkerInfo in project alluxio by Alluxio.

the class JobMaster method workerHeartbeat.

/**
 * Updates the tasks' status when a worker periodically heartbeats with the master, and sends the
 * commands for the worker to execute.
 *
 * @param jobWorkerHealth the job worker health info
 * @param taskInfoList the list of the task information
 * @return the list of {@link JobCommand} to the worker
 */
public List<JobCommand> workerHeartbeat(JobWorkerHealth jobWorkerHealth, List<TaskInfo> taskInfoList) throws ResourceExhaustedException {
    long workerId = jobWorkerHealth.getWorkerId();
    String hostname;
    // Run under shared lock for mWorkers
    try (LockResource workersLockShared = new LockResource(mWorkerRWLock.readLock())) {
        MasterWorkerInfo worker = mWorkers.getFirstByField(mIdIndex, workerId);
        if (worker == null) {
            return Collections.singletonList(JobCommand.newBuilder().setRegisterCommand(RegisterCommand.getDefaultInstance()).build());
        }
        hostname = worker.getWorkerAddress().getHost();
        // Update last-update-time of this particular worker under lock
        // to prevent lost worker detector clearing it under race
        worker.updateLastUpdatedTimeMs();
    }
    mWorkerHealth.put(workerId, jobWorkerHealth);
    // Update task infos for all jobs involved
    Map<Long, List<TaskInfo>> taskInfosPerJob = new HashMap<>();
    for (TaskInfo taskInfo : taskInfoList) {
        taskInfo.setWorkerHost(hostname);
        if (!taskInfosPerJob.containsKey(taskInfo.getJobId())) {
            taskInfosPerJob.put(taskInfo.getJobId(), new ArrayList());
        }
        taskInfosPerJob.get(taskInfo.getJobId()).add(taskInfo);
    }
    for (Map.Entry<Long, List<TaskInfo>> taskInfosPair : taskInfosPerJob.entrySet()) {
        PlanCoordinator planCoordinator = mPlanTracker.getCoordinator(taskInfosPair.getKey());
        if (planCoordinator != null) {
            planCoordinator.updateTasks(taskInfosPair.getValue());
        }
    }
    return mCommandManager.pollAllPendingCommands(workerId);
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TaskInfo(alluxio.job.wire.TaskInfo) LockResource(alluxio.resource.LockResource) MasterWorkerInfo(alluxio.job.MasterWorkerInfo) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) PlanCoordinator(alluxio.master.job.plan.PlanCoordinator)

Aggregations

MasterWorkerInfo (alluxio.job.MasterWorkerInfo)2 PlanCoordinator (alluxio.master.job.plan.PlanCoordinator)2 LockResource (alluxio.resource.LockResource)2 TaskInfo (alluxio.job.wire.TaskInfo)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1