use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.
the class AngelAppBlock method render.
@Override
protected void render(Block html) {
set(TITLE, join("Angel Application", amContext.getApplicationId()));
App app = amContext.getApp();
long elaspedTs = 0;
if (app.getLaunchTime() != 0 && app.getFinishTime() != 0) {
elaspedTs = app.getFinishTime() - app.getLaunchTime();
} else if (app.getLaunchTime() != 0 && app.getFinishTime() == 0) {
elaspedTs = System.currentTimeMillis() - app.getLaunchTime();
}
info("Job Overview")._("Job Name:", amContext.getApplicationName())._("State:", app.getExternAppState().toString())._("Started:", new Date(app.getLaunchTime()))._("Elapsed:", StringUtils.formatTime(elaspedTs))._("Environment:", "nomeaning" == null ? "#" : "angel/EnvironmentPage", "Runtime Information And Properties")._("Task Progress:", "nomeaning" == null ? "#" : "angel/ProgressPage", "progress")._("Master Threaddump:", "nomeaning" == null ? "#" : "angel/ExecutorsPage", "threaddump");
DIV<Hamlet> div = html._(InfoBlock.class).div(_INFO_WRAP);
TABLE<DIV<Hamlet>> table = div.table("#job");
table.tr().th(_TH, "module").th(_TH, "new").th(_TH, "running").th(_TH, "failed").th(_TH, "killed").th(_TH, "success")._();
int newGroupNum = 0;
int runningGroupNum = 0;
int failedGroupNum = 0;
int killedGroupNum = 0;
int successGroupNum = 0;
int newPSNum = 0;
int runningPSNum = 0;
int failedPSNum = 0;
int killedPSNum = 0;
int successPSNum = 0;
LOG.info("before compute worker state items");
if (amContext.getWorkerManager() != null) {
for (AMWorkerGroup group : amContext.getWorkerManager().getWorkerGroupMap().values()) {
switch(group.getState()) {
case NEW:
case INITED:
newGroupNum += 1;
break;
case RUNNING:
runningGroupNum += 1;
break;
case KILLED:
killedGroupNum += 1;
break;
case FAILED:
failedGroupNum += 1;
break;
case SUCCESS:
successGroupNum += 1;
break;
default:
break;
}
}
}
for (AMParameterServer ps : amContext.getParameterServerManager().getParameterServerMap().values()) {
for (PSAttempt psAttemp : ps.getPSAttempts().values()) {
switch(psAttemp.getInternalState()) {
case NEW:
case SCHEDULED:
case LAUNCHED:
newPSNum += 1;
break;
case RUNNING:
case COMMITTING:
runningPSNum += 1;
break;
case KILLED:
killedPSNum += 1;
break;
case FAILED:
failedPSNum += 1;
break;
case SUCCESS:
successPSNum += 1;
break;
default:
break;
}
}
}
table.tr().td("workergroups").td().a(url("angel/workerGroupsPage", "NEW"), String.valueOf(newGroupNum))._().td().a(url("angel/workerGroupsPage", "RUNNING"), String.valueOf(runningGroupNum))._().td().a(url("angel/workerGroupsPage", "FAILED"), String.valueOf(failedGroupNum))._().td().a(url("angel/workerGroupsPage", "KILLED"), String.valueOf(killedGroupNum))._().td().a(url("angel/workerGroupsPage", "SUCCESS"), String.valueOf(successGroupNum))._()._().tr().td("parameterservers").td().a(url("angel/parameterServersPage", "NEW"), String.valueOf(newPSNum))._().td().a(url("angel/parameterServersPage", "RUNNING"), String.valueOf(runningPSNum))._().td().a(url("angel/parameterServersPage", "FAILED"), String.valueOf(failedPSNum))._().td().a(url("angel/parameterServersPage", "KILLED"), String.valueOf(killedPSNum))._().td().a(url("angel/parameterServersPage", "SUCCESS"), String.valueOf(successPSNum))._()._();
table._();
div._();
}
use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.
the class ProgressBlock method render.
@Override
protected void render(Block html) {
set(TITLE, join("Angel Progress"));
Hamlet.TABLE<Hamlet.DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
Hamlet.TR<Hamlet.THEAD<Hamlet.TABLE<Hamlet.DIV<Hamlet>>>> headTr = table.thead().tr();
headTr.th(_TH, "taskid").th(_TH, "state").th(_TH, "current iteration").th(_TH, "workerlog");
headTr._()._();
float current_iteration_progress = (float) 0.0;
float current_clock_progress = (float) 0.0;
Hamlet.TBODY<Hamlet.TABLE<Hamlet.DIV<Hamlet>>> tbody = table.tbody();
List<AMTask> amTaskList = new ArrayList();
Map<AMTask, WorkerAttempt> map = new HashMap<>();
Collection<AMWorkerGroup> amWorkerGroupSet = amContext.getWorkerManager().getWorkerGroupMap().values();
for (AMWorkerGroup amWorkerGroup : amWorkerGroupSet) {
Collection<AMWorker> amWorkerSet = amWorkerGroup.getWorkerSet();
for (AMWorker amWorker : amWorkerSet) {
Collection<WorkerAttempt> workerAttempts = amWorker.getAttempts().values();
for (WorkerAttempt workerAttempt : workerAttempts) {
Collection<AMTask> amTasks = workerAttempt.getTaskMap().values();
for (AMTask amTask : amTasks) {
map.put(amTask, workerAttempt);
}
}
}
}
for (AMTask amTask : amContext.getTaskManager().getTasks()) {
amTaskList.add(amTask);
}
Collections.sort(amTaskList, new Comparator<AMTask>() {
@Override
public int compare(AMTask task1, AMTask task2) {
return task1.getTaskId().getIndex() - task2.getTaskId().getIndex();
}
});
for (AMTask task : amTaskList) {
WorkerAttempt workerAttempt = map.get(task);
if (task.getProgress() >= 0 && task.getProgress() <= 1)
current_iteration_progress = task.getProgress();
current_clock_progress = ((float) task.getIteration()) / ((float) amContext.getTotalIterationNum());
Hamlet.TR<Hamlet.TBODY<Hamlet.TABLE<Hamlet.DIV<Hamlet>>>> tr = tbody.tr();
tr.td(task.getTaskId().toString()).td(task.getState().toString()).td(String.valueOf(task.getIteration()) + "/" + amContext.getTotalIterationNum()).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr(), "node", "containerlogs", workerAttempt.getContainerIdStr(), amContext.getUser().toString()), workerAttempt.getId().toString())._();
tr._();
}
tbody._()._()._();
}
use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.
the class WorkerManager method initWorkers.
private void initWorkers() {
int base = 0;
// init all tasks , workers and worker groups and put them to the corresponding maps
for (int i = 0; i < workergroupNumber; i++) {
Map<WorkerId, AMWorker> workers = new HashMap<WorkerId, AMWorker>();
WorkerId leader = null;
WorkerGroupId groupId = new WorkerGroupId(i);
for (int j = 0; j < workersInGroup; j++) {
base = (i * workersInGroup + j) * taskNumberInEachWorker;
List<TaskId> taskIds = new ArrayList<TaskId>(taskNumberInEachWorker);
for (int k = 0; k < taskNumberInEachWorker && (base < totalTaskNumber); k++, base++) {
taskIds.add(new TaskId(base));
}
WorkerId workerId = new WorkerId(groupId, i * workersInGroup + j);
AMWorker worker = new AMWorker(workerId, context, taskIds);
workersMap.put(workerId, worker);
workers.put(workerId, worker);
if (j == 0) {
leader = workerId;
}
}
AMWorkerGroup group = new AMWorkerGroup(groupId, context, workers, leader, i);
for (WorkerId id : workers.keySet()) {
findWorkerGroupMap.put(id, group);
for (TaskId taskId : workers.get(id).getTaskIds()) {
taskIdToWorkerMap.put(taskId, workers.get(id));
}
}
workerGroupMap.put(groupId, group);
group.handle(new AMWorkerGroupEvent(AMWorkerGroupEventType.INIT, groupId));
}
LOG.info("to init taskClockManager!");
}
use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.
the class WorkerGroupBlock method render.
@Override
protected void render(Block html) {
String workerGroupIdSr = $(WORKERGROUP_ID);
if (workerGroupIdSr.isEmpty()) {
html.p()._("Sorry, can't do anything without a WorkerGroupId.")._();
return;
}
WorkerGroupId workerGroupId;
try {
workerGroupId = new WorkerGroupId(workerGroupIdSr);
} catch (UnvalidIdStrException e) {
LOG.error("unvalid id string, ", e);
return;
}
AMWorkerGroup workerGroup = amContext.getWorkerManager().getWorkerGroup(workerGroupId);
if (workerGroup == null) {
html.p()._("Sorry, can't find group " + workerGroupId)._();
return;
}
set(TITLE, join("Angel WorkerGroup ", $(WORKERGROUP_ID)));
html.h1(workerGroupIdSr);
TABLE<DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
TR<THEAD<TABLE<DIV<Hamlet>>>> headTr = table.thead().tr();
headTr.th(_TH, "id").th(_TH, "state").th(_TH, "node address").th(_TH, "start time").th(_TH, "end time").th(_TH, "elapsed time").th(_TH, "log").th(_TH, "threadstack").th(_TH, "workercounter");
headTr._()._();
TBODY<TABLE<DIV<Hamlet>>> tbody = table.tbody();
for (AMWorker worker : workerGroup.getWorkerSet()) {
Map<WorkerAttemptId, WorkerAttempt> workerAttempts = worker.getAttempts();
for (WorkerAttempt workerAttempt : workerAttempts.values()) {
TR<TBODY<TABLE<DIV<Hamlet>>>> tr = tbody.tr();
long elaspedTs = 0;
if (workerAttempt.getLaunchTime() != 0 && workerAttempt.getFinishTime() != 0) {
elaspedTs = workerAttempt.getFinishTime() - workerAttempt.getLaunchTime();
} else if (workerAttempt.getLaunchTime() != 0 && workerAttempt.getFinishTime() == 0) {
elaspedTs = System.currentTimeMillis() - workerAttempt.getLaunchTime();
}
if (workerAttempt.getNodeHttpAddr() == null) {
tr.td().a(url("angel/workerPage", workerAttempt.getId().toString()), workerAttempt.getId().toString())._().td(workerAttempt.getState().toString()).td("N/A").td((workerAttempt.getLaunchTime() == 0) ? "N/A" : new Date(workerAttempt.getLaunchTime()).toString()).td((workerAttempt.getFinishTime() == 0) ? "N/A" : new Date(workerAttempt.getFinishTime()).toString()).td((elaspedTs == 0) ? "N/A" : StringUtils.formatTime(elaspedTs)).td("N/A").td("N/A").td("N/A");
} else {
tr.td().a(url("angel/workerPage", workerAttempt.getId().toString()), workerAttempt.getId().toString())._().td(workerAttempt.getState().toString()).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr()), workerAttempt.getNodeHttpAddr())._().td((workerAttempt.getLaunchTime() == 0) ? "N/A" : new Date(workerAttempt.getLaunchTime()).toString()).td((workerAttempt.getFinishTime() == 0) ? "N/A" : new Date(workerAttempt.getFinishTime()).toString()).td((elaspedTs == 0) ? "N/A" : StringUtils.formatTime(elaspedTs)).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr(), "node", "containerlogs", workerAttempt.getContainerIdStr(), amContext.getUser().toString()), "log")._().td().a(url("angel/workerThreadStackPage/", workerAttempt.getId().toString()), "workerthreadstack")._().td().a(url("angel/workerCounterPage/", workerAttempt.getId().toString()), "workercounter")._();
}
tr._();
}
}
tbody._()._()._();
}
use of com.tencent.angel.master.worker.workergroup.AMWorkerGroup in project angel by Tencent.
the class WorkerGroupsBlock method render.
@Override
protected void render(Block html) {
set(TITLE, join("Angel WorkerGroups ", $(WORKERGROUP_STATE)));
TABLE<Hamlet> table = html.table("#job");
TR<THEAD<TABLE<Hamlet>>> tr = table.thead().tr();
tr.th(_TH, "id").th(_TH, "state").th(_TH, "leader").th(_TH, "start time").th(_TH, "end time").th(_TH, "elapsed time");
tr._()._();
Set<AMWorkerGroupState> stateSet = transformToInternalState($(WORKERGROUP_STATE));
TBODY<TABLE<Hamlet>> tbody = table.tbody();
LOG.info("before get groups, group size is " + amContext.getWorkerManager().getWorkerGroupMap().size());
for (AMWorkerGroupState s : stateSet) {
LOG.info("s = " + s);
}
for (AMWorkerGroup workerGroup : amContext.getWorkerManager().getWorkerGroupMap().values()) {
LOG.info("group state is " + workerGroup.getState());
if (stateSet.contains(workerGroup.getState())) {
TR<TBODY<TABLE<Hamlet>>> tr1 = tbody.tr();
long elaspedTs = 0;
if (workerGroup.getLaunchTime() != 0 && workerGroup.getFinishTime() != 0) {
elaspedTs = workerGroup.getFinishTime() - workerGroup.getLaunchTime();
} else if (workerGroup.getLaunchTime() != 0 && workerGroup.getFinishTime() == 0) {
elaspedTs = System.currentTimeMillis() - workerGroup.getLaunchTime();
}
tr1.td().a(url("angel/workerGroupPage/", workerGroup.getId().toString()), workerGroup.getId().toString())._().td($(WORKERGROUP_STATE)).td(workerGroup.getLeader().toString()).td(workerGroup.getLaunchTime() == 0 ? "N/A" : new Date(workerGroup.getLaunchTime()).toString()).td(workerGroup.getFinishTime() == 0 ? "N/A" : new Date(workerGroup.getFinishTime()).toString()).td(elaspedTs == 0 ? "N/A" : StringUtils.formatTime(elaspedTs));
tr1._();
}
}
tbody._()._();
}
Aggregations