Search in sources :

Example 6 with AMWorkerGroup

use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.

the class WorkerManager method startAllWorker.

/**
 * init and start all workers
 */
public void startAllWorker() {
    LOG.info("to start all workers.....");
    try {
        writeLock.lock();
        initWorkers();
        for (int i = 0; i < workerGroupMap.size(); i++) {
            AMWorkerGroup group = workerGroupMap.get(new WorkerGroupId(i));
            for (AMWorker worker : group.getWorkerSet()) {
                worker.handle(new AMWorkerEvent(AMWorkerEventType.SCHEDULE, worker.getId()));
            }
        }
        isInited = true;
    } finally {
        writeLock.unlock();
    }
}
Also used : AMWorkerGroup(com.tencent.angel.master.worker.workergroup.AMWorkerGroup) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) AMWorkerEvent(com.tencent.angel.master.worker.worker.AMWorkerEvent) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId)

Example 7 with AMWorkerGroup

use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.

the class WorkerGroupBlock method render.

@Override
protected void render(Block html) {
    String workerGroupIdSr = $(WORKERGROUP_ID);
    if (workerGroupIdSr.isEmpty()) {
        html.p()._("Sorry, can't do anything without a WorkerGroupId.")._();
        return;
    }
    WorkerGroupId workerGroupId;
    try {
        workerGroupId = new WorkerGroupId(workerGroupIdSr);
    } catch (UnvalidIdStrException e) {
        LOG.error("unvalid id string, ", e);
        return;
    }
    AMWorkerGroup workerGroup = amContext.getWorkerManager().getWorkerGroup(workerGroupId);
    if (workerGroup == null) {
        html.p()._("Sorry, can't find group " + workerGroupId)._();
        return;
    }
    set(TITLE, join("Angel WorkerGroup ", $(WORKERGROUP_ID)));
    html.h1(workerGroupIdSr);
    TABLE<DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
    TR<THEAD<TABLE<DIV<Hamlet>>>> headTr = table.thead().tr();
    headTr.th(_TH, "id").th(_TH, "state").th(_TH, "node address").th(_TH, "start time").th(_TH, "end time").th(_TH, "elapsed time").th(_TH, "log").th(_TH, "threadstack").th(_TH, "workercounter");
    headTr._()._();
    TBODY<TABLE<DIV<Hamlet>>> tbody = table.tbody();
    for (AMWorker worker : workerGroup.getWorkerSet()) {
        Map<WorkerAttemptId, WorkerAttempt> workerAttempts = worker.getAttempts();
        for (WorkerAttempt workerAttempt : workerAttempts.values()) {
            TR<TBODY<TABLE<DIV<Hamlet>>>> tr = tbody.tr();
            long elaspedTs = 0;
            if (workerAttempt.getLaunchTime() != 0 && workerAttempt.getFinishTime() != 0) {
                elaspedTs = workerAttempt.getFinishTime() - workerAttempt.getLaunchTime();
            } else if (workerAttempt.getLaunchTime() != 0 && workerAttempt.getFinishTime() == 0) {
                elaspedTs = System.currentTimeMillis() - workerAttempt.getLaunchTime();
            }
            if (workerAttempt.getNodeHttpAddr() == null) {
                tr.td().a(url("angel/workerPage", workerAttempt.getId().toString()), workerAttempt.getId().toString())._().td(workerAttempt.getState().toString()).td("N/A").td((workerAttempt.getLaunchTime() == 0) ? "N/A" : new Date(workerAttempt.getLaunchTime()).toString()).td((workerAttempt.getFinishTime() == 0) ? "N/A" : new Date(workerAttempt.getFinishTime()).toString()).td((elaspedTs == 0) ? "N/A" : StringUtils.formatTime(elaspedTs)).td("N/A").td("N/A").td("N/A");
            } else {
                tr.td().a(url("angel/workerPage", workerAttempt.getId().toString()), workerAttempt.getId().toString())._().td(workerAttempt.getState().toString()).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr()), workerAttempt.getNodeHttpAddr())._().td((workerAttempt.getLaunchTime() == 0) ? "N/A" : new Date(workerAttempt.getLaunchTime()).toString()).td((workerAttempt.getFinishTime() == 0) ? "N/A" : new Date(workerAttempt.getFinishTime()).toString()).td((elaspedTs == 0) ? "N/A" : StringUtils.formatTime(elaspedTs)).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr(), "node", "containerlogs", workerAttempt.getContainerIdStr(), amContext.getUser().toString()), "log")._().td().a(url("angel/workerThreadStackPage/", workerAttempt.getId().toString()), "workerthreadstack")._().td().a(url("angel/workerCounterPage/", workerAttempt.getId().toString()), "workercounter")._();
            }
            tr._();
        }
    }
    tbody._()._()._();
}
Also used : Hamlet(org.apache.hadoop.yarn.webapp.hamlet.Hamlet) AMWorkerGroup(com.tencent.angel.master.worker.workergroup.AMWorkerGroup) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) Date(java.util.Date) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) UnvalidIdStrException(com.tencent.angel.exception.UnvalidIdStrException) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt)

Example 8 with AMWorkerGroup

use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.

the class WorkerGroupsBlock method render.

@Override
protected void render(Block html) {
    set(TITLE, join("Angel WorkerGroups ", $(WORKERGROUP_STATE)));
    TABLE<Hamlet> table = html.table("#job");
    TR<THEAD<TABLE<Hamlet>>> tr = table.thead().tr();
    tr.th(_TH, "id").th(_TH, "state").th(_TH, "leader").th(_TH, "start time").th(_TH, "end time").th(_TH, "elapsed time");
    tr._()._();
    Set<AMWorkerGroupState> stateSet = transformToInternalState($(WORKERGROUP_STATE));
    TBODY<TABLE<Hamlet>> tbody = table.tbody();
    LOG.info("before get groups, group size is " + amContext.getWorkerManager().getWorkerGroupMap().size());
    for (AMWorkerGroupState s : stateSet) {
        LOG.info("s = " + s);
    }
    for (AMWorkerGroup workerGroup : amContext.getWorkerManager().getWorkerGroupMap().values()) {
        LOG.info("group state is " + workerGroup.getState());
        if (stateSet.contains(workerGroup.getState())) {
            TR<TBODY<TABLE<Hamlet>>> tr1 = tbody.tr();
            long elaspedTs = 0;
            if (workerGroup.getLaunchTime() != 0 && workerGroup.getFinishTime() != 0) {
                elaspedTs = workerGroup.getFinishTime() - workerGroup.getLaunchTime();
            } else if (workerGroup.getLaunchTime() != 0 && workerGroup.getFinishTime() == 0) {
                elaspedTs = System.currentTimeMillis() - workerGroup.getLaunchTime();
            }
            tr1.td().a(url("angel/workerGroupPage/", workerGroup.getId().toString()), workerGroup.getId().toString())._().td($(WORKERGROUP_STATE)).td(workerGroup.getLeader().toString()).td(workerGroup.getLaunchTime() == 0 ? "N/A" : new Date(workerGroup.getLaunchTime()).toString()).td(workerGroup.getFinishTime() == 0 ? "N/A" : new Date(workerGroup.getFinishTime()).toString()).td(elaspedTs == 0 ? "N/A" : StringUtils.formatTime(elaspedTs));
            tr1._();
        }
    }
    tbody._()._();
}
Also used : Hamlet(org.apache.hadoop.yarn.webapp.hamlet.Hamlet) AMWorkerGroup(com.tencent.angel.master.worker.workergroup.AMWorkerGroup) TBODY(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY) Date(java.util.Date) TABLE(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE) THEAD(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.THEAD) AMWorkerGroupState(com.tencent.angel.master.worker.workergroup.AMWorkerGroupState)

Aggregations

AMWorkerGroup (com.tencent.angel.master.worker.workergroup.AMWorkerGroup)8 AMWorker (com.tencent.angel.master.worker.worker.AMWorker)5 Hamlet (org.apache.hadoop.yarn.webapp.hamlet.Hamlet)4 WorkerAttempt (com.tencent.angel.master.worker.attempt.WorkerAttempt)3 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)3 Date (java.util.Date)3 ServiceException (com.google.protobuf.ServiceException)2 AMTask (com.tencent.angel.master.task.AMTask)2 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)2 AngelException (com.tencent.angel.exception.AngelException)1 UnvalidIdStrException (com.tencent.angel.exception.UnvalidIdStrException)1 App (com.tencent.angel.master.app.App)1 PSAttempt (com.tencent.angel.master.ps.attempt.PSAttempt)1 AMParameterServer (com.tencent.angel.master.ps.ps.AMParameterServer)1 AMTaskManager (com.tencent.angel.master.task.AMTaskManager)1 WorkerManager (com.tencent.angel.master.worker.WorkerManager)1 AMWorkerEvent (com.tencent.angel.master.worker.worker.AMWorkerEvent)1 AMWorkerGroupEvent (com.tencent.angel.master.worker.workergroup.AMWorkerGroupEvent)1 AMWorkerGroupState (com.tencent.angel.master.worker.workergroup.AMWorkerGroupState)1 MasterClient (com.tencent.angel.psagent.client.MasterClient)1