use of alluxio.job.MasterWorkerInfo in project alluxio by Alluxio.
the class JobMaster method registerWorker.
/**
* Returns a worker id for the given worker.
*
* @param workerNetAddress the worker {@link WorkerNetAddress}
* @return the worker id for this worker
*/
public long registerWorker(WorkerNetAddress workerNetAddress) {
// Run under exclusive lock for mWorkers
try (LockResource workersLockExclusive = new LockResource(mWorkerRWLock.writeLock())) {
// Check if worker has already been registered with this job master
if (mWorkers.contains(mAddressIndex, workerNetAddress)) {
// If the worker is trying to re-register, it must have died and been restarted. We need to
// clean up the dead worker.
LOG.info("Worker at address {} is re-registering. Failing tasks for previous worker at that " + "address", workerNetAddress);
MasterWorkerInfo deadWorker = mWorkers.getFirstByField(mAddressIndex, workerNetAddress);
for (PlanCoordinator planCoordinator : mPlanTracker.coordinators()) {
planCoordinator.failTasksForWorker(deadWorker.getId());
}
mWorkerHealth.remove(deadWorker.getId());
mWorkers.remove(deadWorker);
}
// Generate a new worker id.
long workerId = mNextWorkerId.getAndIncrement();
mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress));
LOG.info("registerWorker(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId);
return workerId;
}
}
use of alluxio.job.MasterWorkerInfo in project alluxio by Alluxio.
the class JobMaster method workerHeartbeat.
/**
* Updates the tasks' status when a worker periodically heartbeats with the master, and sends the
* commands for the worker to execute.
*
* @param jobWorkerHealth the job worker health info
* @param taskInfoList the list of the task information
* @return the list of {@link JobCommand} to the worker
*/
public List<JobCommand> workerHeartbeat(JobWorkerHealth jobWorkerHealth, List<TaskInfo> taskInfoList) throws ResourceExhaustedException {
long workerId = jobWorkerHealth.getWorkerId();
String hostname;
// Run under shared lock for mWorkers
try (LockResource workersLockShared = new LockResource(mWorkerRWLock.readLock())) {
MasterWorkerInfo worker = mWorkers.getFirstByField(mIdIndex, workerId);
if (worker == null) {
return Collections.singletonList(JobCommand.newBuilder().setRegisterCommand(RegisterCommand.getDefaultInstance()).build());
}
hostname = worker.getWorkerAddress().getHost();
// Update last-update-time of this particular worker under lock
// to prevent lost worker detector clearing it under race
worker.updateLastUpdatedTimeMs();
}
mWorkerHealth.put(workerId, jobWorkerHealth);
// Update task infos for all jobs involved
Map<Long, List<TaskInfo>> taskInfosPerJob = new HashMap<>();
for (TaskInfo taskInfo : taskInfoList) {
taskInfo.setWorkerHost(hostname);
if (!taskInfosPerJob.containsKey(taskInfo.getJobId())) {
taskInfosPerJob.put(taskInfo.getJobId(), new ArrayList());
}
taskInfosPerJob.get(taskInfo.getJobId()).add(taskInfo);
}
for (Map.Entry<Long, List<TaskInfo>> taskInfosPair : taskInfosPerJob.entrySet()) {
PlanCoordinator planCoordinator = mPlanTracker.getCoordinator(taskInfosPair.getKey());
if (planCoordinator != null) {
planCoordinator.updateTasks(taskInfosPair.getValue());
}
}
return mCommandManager.pollAllPendingCommands(workerId);
}
Aggregations