Search in sources :

Example 1 with AMWorker

use of com.tencent.angel.master.worker.worker.AMWorker in project angel by Tencent.

the class ProgressBlock method render.

@Override
protected void render(Block html) {
    set(TITLE, join("Angel Progress"));
    Hamlet.TABLE<Hamlet.DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
    Hamlet.TR<Hamlet.THEAD<Hamlet.TABLE<Hamlet.DIV<Hamlet>>>> headTr = table.thead().tr();
    headTr.th(_TH, "taskid").th(_TH, "state").th(_TH, "current iteration").th(_TH, "workerlog");
    headTr._()._();
    float current_iteration_progress = (float) 0.0;
    float current_clock_progress = (float) 0.0;
    Hamlet.TBODY<Hamlet.TABLE<Hamlet.DIV<Hamlet>>> tbody = table.tbody();
    List<AMTask> amTaskList = new ArrayList();
    Map<AMTask, WorkerAttempt> map = new HashMap<>();
    Collection<AMWorkerGroup> amWorkerGroupSet = amContext.getWorkerManager().getWorkerGroupMap().values();
    for (AMWorkerGroup amWorkerGroup : amWorkerGroupSet) {
        Collection<AMWorker> amWorkerSet = amWorkerGroup.getWorkerSet();
        for (AMWorker amWorker : amWorkerSet) {
            Collection<WorkerAttempt> workerAttempts = amWorker.getAttempts().values();
            for (WorkerAttempt workerAttempt : workerAttempts) {
                Collection<AMTask> amTasks = workerAttempt.getTaskMap().values();
                for (AMTask amTask : amTasks) {
                    map.put(amTask, workerAttempt);
                }
            }
        }
    }
    for (AMTask amTask : amContext.getTaskManager().getTasks()) {
        amTaskList.add(amTask);
    }
    Collections.sort(amTaskList, new Comparator<AMTask>() {

        @Override
        public int compare(AMTask task1, AMTask task2) {
            return task1.getTaskId().getIndex() - task2.getTaskId().getIndex();
        }
    });
    for (AMTask task : amTaskList) {
        WorkerAttempt workerAttempt = map.get(task);
        if (task.getProgress() >= 0 && task.getProgress() <= 1)
            current_iteration_progress = task.getProgress();
        current_clock_progress = ((float) task.getIteration()) / ((float) amContext.getTotalIterationNum());
        Hamlet.TR<Hamlet.TBODY<Hamlet.TABLE<Hamlet.DIV<Hamlet>>>> tr = tbody.tr();
        tr.td(task.getTaskId().toString()).td(task.getState().toString()).td(String.valueOf(task.getIteration()) + "/" + amContext.getTotalIterationNum()).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr(), "node", "containerlogs", workerAttempt.getContainerIdStr(), amContext.getUser().toString()), workerAttempt.getId().toString())._();
        tr._();
    }
    tbody._()._()._();
}
Also used : Hamlet(org.apache.hadoop.yarn.webapp.hamlet.Hamlet) AMWorkerGroup(com.tencent.angel.master.worker.workergroup.AMWorkerGroup) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) AMTask(com.tencent.angel.master.task.AMTask)

Example 2 with AMWorker

use of com.tencent.angel.master.worker.worker.AMWorker in project angel by Tencent.

the class WorkerBlock method render.

@Override
protected void render(Block html) {
    set(TITLE, join("Angel Worker Attempt ", $(WORKER_ATTEMPT_ID)));
    String workerAttemptIdStr = $(WORKER_ATTEMPT_ID);
    if (workerAttemptIdStr == null || workerAttemptIdStr.isEmpty()) {
        html.p()._("Sorry, can't do anything without a WorkerId.")._();
        return;
    }
    WorkerAttemptId workerAttemptId = null;
    try {
        workerAttemptId = new WorkerAttemptId(workerAttemptIdStr);
    } catch (UnvalidIdStrException e) {
        LOG.error("unvalid id string, ", e);
        return;
    }
    AMWorker worker;
    worker = amContext.getWorkerManager().getWorker(workerAttemptId.getWorkerId());
    if (worker == null) {
        html.p()._("Sorry, can't find worker " + workerAttemptId.getWorkerId())._();
        return;
    }
    WorkerAttempt workerAttempt = worker.getWorkerAttempt(workerAttemptId);
    TABLE<DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
    TR<THEAD<TABLE<DIV<Hamlet>>>> headTr = table.thead().tr();
    headTr.th(_TH, "taskid").th(_TH, "state").th(_TH, "current iteration").th(_TH, "current iteration bar").th(_TH, "current progress").th(_TH, "current progress bar").th(_TH, "taskcounters");
    headTr._()._();
    float current_iteration_progress = (float) 0.0;
    float current_clock_progress = (float) 0.0;
    TBODY<TABLE<DIV<Hamlet>>> tbody = table.tbody();
    for (AMTask task : workerAttempt.getTaskMap().values()) {
        if (task.getProgress() >= 0 && task.getProgress() <= 1)
            current_iteration_progress = task.getProgress();
        current_clock_progress = ((float) task.getIteration()) / ((float) amContext.getTotalIterationNum());
        TR<TBODY<TABLE<DIV<Hamlet>>>> tr = tbody.tr();
        tr.td(task.getTaskId().toString()).td(task.getState().toString()).td(String.valueOf(task.getIteration()) + "/" + amContext.getTotalIterationNum()).td().div(_PROGRESSBAR).$title(// tooltip
        join(String.valueOf(current_clock_progress * 100), '%')).div(_PROGRESSBAR_VALUE).$style(join("width:", String.valueOf(current_clock_progress * 100), '%'))._()._()._().td(String.valueOf(current_iteration_progress)).td().div(_PROGRESSBAR).$title(join(String.valueOf(current_iteration_progress * 100), '%')).div(_PROGRESSBAR_VALUE).$style(join("width:", String.valueOf(current_iteration_progress * 100), '%'))._()._()._().td().a(url("angel/taskCountersPage/", task.getTaskId().toString()), "taskcounters")._();
        tr._();
    }
    tbody._()._()._();
}
Also used : Hamlet(org.apache.hadoop.yarn.webapp.hamlet.Hamlet) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) UnvalidIdStrException(com.tencent.angel.exception.UnvalidIdStrException) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt) AMTask(com.tencent.angel.master.task.AMTask)

Example 3 with AMWorker

use of com.tencent.angel.master.worker.worker.AMWorker in project angel by Tencent.

the class WorkerManager method initWorkers.

private void initWorkers() {
    int base = 0;
    // init all tasks , workers and worker groups and put them to the corresponding maps
    for (int i = 0; i < workergroupNumber; i++) {
        Map<WorkerId, AMWorker> workers = new HashMap<WorkerId, AMWorker>();
        WorkerId leader = null;
        WorkerGroupId groupId = new WorkerGroupId(i);
        for (int j = 0; j < workersInGroup; j++) {
            base = (i * workersInGroup + j) * taskNumberInEachWorker;
            List<TaskId> taskIds = new ArrayList<TaskId>(taskNumberInEachWorker);
            for (int k = 0; k < taskNumberInEachWorker && (base < totalTaskNumber); k++, base++) {
                taskIds.add(new TaskId(base));
            }
            WorkerId workerId = new WorkerId(groupId, i * workersInGroup + j);
            AMWorker worker = new AMWorker(workerId, context, taskIds);
            workersMap.put(workerId, worker);
            workers.put(workerId, worker);
            if (j == 0) {
                leader = workerId;
            }
        }
        AMWorkerGroup group = new AMWorkerGroup(groupId, context, workers, leader, i);
        for (WorkerId id : workers.keySet()) {
            findWorkerGroupMap.put(id, group);
            for (TaskId taskId : workers.get(id).getTaskIds()) {
                taskIdToWorkerMap.put(taskId, workers.get(id));
            }
        }
        workerGroupMap.put(groupId, group);
        group.handle(new AMWorkerGroupEvent(AMWorkerGroupEventType.INIT, groupId));
    }
    LOG.info("to init taskClockManager!");
}
Also used : AMWorkerGroup(com.tencent.angel.master.worker.workergroup.AMWorkerGroup) TaskId(com.tencent.angel.worker.task.TaskId) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AMWorkerGroupEvent(com.tencent.angel.master.worker.workergroup.AMWorkerGroupEvent) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId)

Example 4 with AMWorker

use of com.tencent.angel.master.worker.worker.AMWorker in project angel by Tencent.

the class ProtobufUtil method buildWorkerGroupMeta.

private static WorkerGroupMetaInfoProto buildWorkerGroupMeta(AMWorkerGroup group, SplitClassification splits, Configuration conf) throws IOException {
    WorkerGroupMetaInfoProto.Builder builder = WorkerGroupMetaInfoProto.newBuilder();
    builder.setWorkerGroupId(convertToIdProto(group.getId()));
    builder.setLeaderId(convertToIdProto(group.getLeader()));
    for (AMWorker w : group.getWorkerSet()) {
        builder.addWorkers(buildWorkerMetaProto(w));
    }
    if (splits != null) {
        List<SplitInfo> splitInfoList = SerdeUtils.serilizeSplits(splits, conf);
        SplitInfoProto.Builder splitBuilder = SplitInfoProto.newBuilder();
        for (SplitInfo split : splitInfoList) {
            builder.addSplits(splitBuilder.setSplitClass(split.getSplitClass()).setSplit(ByteString.copyFrom(split.getSplit())).build());
        }
    }
    return builder.build();
}
Also used : WorkerGroupMetaInfoProto(com.tencent.angel.protobuf.generated.WorkerMasterServiceProtos.WorkerGroupMetaInfoProto) SplitInfoProto(com.tencent.angel.protobuf.generated.WorkerMasterServiceProtos.SplitInfoProto) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) SplitInfo(com.tencent.angel.split.SplitInfo)

Example 5 with AMWorker

use of com.tencent.angel.master.worker.worker.AMWorker in project angel by Tencent.

the class MasterService method getWorkerLogDir.

/**
 * Get worker log url
 *
 * @param controller rpc controller
 * @param request rpc request contains worker id
 * @return worker log url
 * @throws ServiceException worker does not exist
 */
@Override
public GetWorkerLogDirResponse getWorkerLogDir(RpcController controller, GetWorkerLogDirRequest request) throws ServiceException {
    WorkerId workerId = ProtobufUtil.convertToId(request.getWorkerId());
    AMWorker worker = context.getWorkerManager().getWorker(workerId);
    if (worker == null) {
        throw new ServiceException("can not find worker " + workerId);
    }
    WorkerAttempt workerAttempt = worker.getRunningAttempt();
    if (workerAttempt == null) {
        return GetWorkerLogDirResponse.newBuilder().setLogDir("").build();
    }
    Location loc = workerAttempt.getLocation();
    Container container = workerAttempt.getContainer();
    if (loc == null || container == null) {
        return GetWorkerLogDirResponse.newBuilder().setLogDir("").build();
    }
    return GetWorkerLogDirResponse.newBuilder().setLogDir("http://" + loc.getIp() + ":" + yarnNMWebPort + "/node/containerlogs/" + container.getId() + "/angel/syslog/?start=0").build();
}
Also used : Container(org.apache.hadoop.yarn.api.records.Container) ServiceException(com.google.protobuf.ServiceException) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt) WorkerId(com.tencent.angel.worker.WorkerId) PSLocation(com.tencent.angel.ps.server.data.PSLocation) Location(com.tencent.angel.common.location.Location)

Aggregations

AMWorker (com.tencent.angel.master.worker.worker.AMWorker)9 WorkerAttempt (com.tencent.angel.master.worker.attempt.WorkerAttempt)5 AMWorkerGroup (com.tencent.angel.master.worker.workergroup.AMWorkerGroup)5 AMTask (com.tencent.angel.master.task.AMTask)4 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)3 Hamlet (org.apache.hadoop.yarn.webapp.hamlet.Hamlet)3 ServiceException (com.google.protobuf.ServiceException)2 UnvalidIdStrException (com.tencent.angel.exception.UnvalidIdStrException)2 AMTaskManager (com.tencent.angel.master.task.AMTaskManager)2 WorkerManager (com.tencent.angel.master.worker.WorkerManager)2 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)2 WorkerId (com.tencent.angel.worker.WorkerId)2 TaskId (com.tencent.angel.worker.task.TaskId)2 Id (com.tencent.angel.common.Id)1 Location (com.tencent.angel.common.location.Location)1 AngelException (com.tencent.angel.exception.AngelException)1 AMWorkerEvent (com.tencent.angel.master.worker.worker.AMWorkerEvent)1 AMWorkerGroupEvent (com.tencent.angel.master.worker.workergroup.AMWorkerGroupEvent)1 SplitInfoProto (com.tencent.angel.protobuf.generated.WorkerMasterServiceProtos.SplitInfoProto)1 WorkerGroupMetaInfoProto (com.tencent.angel.protobuf.generated.WorkerMasterServiceProtos.WorkerGroupMetaInfoProto)1