use of com.tencent.angel.master.ps.attempt.PSAttempt in project angel by Tencent.
the class AngelAppBlock method render.
@Override
protected void render(Block html) {
set(TITLE, join("Angel Application", amContext.getApplicationId()));
App app = amContext.getApp();
long elaspedTs = 0;
if (app.getLaunchTime() != 0 && app.getFinishTime() != 0) {
elaspedTs = app.getFinishTime() - app.getLaunchTime();
} else if (app.getLaunchTime() != 0 && app.getFinishTime() == 0) {
elaspedTs = System.currentTimeMillis() - app.getLaunchTime();
}
info("Job Overview")._("Job Name:", amContext.getApplicationName())._("State:", app.getExternAppState().toString())._("Started:", new Date(app.getLaunchTime()))._("Elapsed:", StringUtils.formatTime(elaspedTs))._("Environment:", "nomeaning" == null ? "#" : "angel/EnvironmentPage", "Runtime Information And Properties")._("Task Progress:", "nomeaning" == null ? "#" : "angel/ProgressPage", "progress")._("Master Threaddump:", "nomeaning" == null ? "#" : "angel/ExecutorsPage", "threaddump");
DIV<Hamlet> div = html._(InfoBlock.class).div(_INFO_WRAP);
TABLE<DIV<Hamlet>> table = div.table("#job");
table.tr().th(_TH, "module").th(_TH, "new").th(_TH, "running").th(_TH, "failed").th(_TH, "killed").th(_TH, "success")._();
int newGroupNum = 0;
int runningGroupNum = 0;
int failedGroupNum = 0;
int killedGroupNum = 0;
int successGroupNum = 0;
int newPSNum = 0;
int runningPSNum = 0;
int failedPSNum = 0;
int killedPSNum = 0;
int successPSNum = 0;
LOG.info("before compute worker state items");
if (amContext.getWorkerManager() != null) {
for (AMWorkerGroup group : amContext.getWorkerManager().getWorkerGroupMap().values()) {
switch(group.getState()) {
case NEW:
case INITED:
newGroupNum += 1;
break;
case RUNNING:
runningGroupNum += 1;
break;
case KILLED:
killedGroupNum += 1;
break;
case FAILED:
failedGroupNum += 1;
break;
case SUCCESS:
successGroupNum += 1;
break;
default:
break;
}
}
}
for (AMParameterServer ps : amContext.getParameterServerManager().getParameterServerMap().values()) {
for (PSAttempt psAttemp : ps.getPSAttempts().values()) {
switch(psAttemp.getInternalState()) {
case NEW:
case SCHEDULED:
case LAUNCHED:
newPSNum += 1;
break;
case RUNNING:
case COMMITTING:
runningPSNum += 1;
break;
case KILLED:
killedPSNum += 1;
break;
case FAILED:
failedPSNum += 1;
break;
case SUCCESS:
successPSNum += 1;
break;
default:
break;
}
}
}
table.tr().td("workergroups").td().a(url("angel/workerGroupsPage", "NEW"), String.valueOf(newGroupNum))._().td().a(url("angel/workerGroupsPage", "RUNNING"), String.valueOf(runningGroupNum))._().td().a(url("angel/workerGroupsPage", "FAILED"), String.valueOf(failedGroupNum))._().td().a(url("angel/workerGroupsPage", "KILLED"), String.valueOf(killedGroupNum))._().td().a(url("angel/workerGroupsPage", "SUCCESS"), String.valueOf(successGroupNum))._()._().tr().td("parameterservers").td().a(url("angel/parameterServersPage", "NEW"), String.valueOf(newPSNum))._().td().a(url("angel/parameterServersPage", "RUNNING"), String.valueOf(runningPSNum))._().td().a(url("angel/parameterServersPage", "FAILED"), String.valueOf(failedPSNum))._().td().a(url("angel/parameterServersPage", "KILLED"), String.valueOf(killedPSNum))._().td().a(url("angel/parameterServersPage", "SUCCESS"), String.valueOf(successPSNum))._()._();
table._();
div._();
}
use of com.tencent.angel.master.ps.attempt.PSAttempt in project angel by Tencent.
the class ParameterServersBlock method render.
@Override
protected void render(Block html) {
set(TITLE, join("Angel ParameterServers"));
TABLE<Hamlet> table = html.table("#job");
TR<THEAD<TABLE<Hamlet>>> headTr = table.thead().tr();
headTr.th(_TH, "id").th(_TH, "state").th(_TH, "node address").th(_TH, "start time").th(_TH, "end time").th(_TH, "elapsed time").th(_TH, "log").th(_TH, "threadstack");
headTr._()._();
Set<PSAttemptStateInternal> stateSet = transformToInternalState($(PARAMETERSERVER_STATE));
TBODY<TABLE<Hamlet>> tbody = table.tbody();
for (AMParameterServer ps : amContext.getParameterServerManager().getParameterServerMap().values()) {
Map<PSAttemptId, PSAttempt> psAttempts = ps.getPSAttempts();
for (PSAttempt psAttempt : psAttempts.values()) {
if (stateSet.contains(psAttempt.getInternalState())) {
TR<TBODY<TABLE<Hamlet>>> tr = tbody.tr();
long elaspedTs = 0;
if (psAttempt.getLaunchTime() != 0 && psAttempt.getFinishTime() != 0) {
elaspedTs = psAttempt.getFinishTime() - psAttempt.getLaunchTime();
} else if (psAttempt.getLaunchTime() != 0 && psAttempt.getFinishTime() == 0) {
elaspedTs = System.currentTimeMillis() - psAttempt.getLaunchTime();
}
if (psAttempt.getNodeHttpAddr() == null) {
tr.td(psAttempt.getId().toString()).td($(PARAMETERSERVER_STATE)).td("N/A").td(psAttempt.getLaunchTime() == 0 ? "N/A" : new Date(psAttempt.getLaunchTime()).toString()).td(psAttempt.getFinishTime() == 0 ? "N/A" : new Date(psAttempt.getFinishTime()).toString()).td(elaspedTs == 0 ? "N/A" : new Date(elaspedTs).toString()).td("N/A").td("N/A");
tr._();
} else {
tr.td(psAttempt.getId().toString()).td($(PARAMETERSERVER_STATE)).td().a(url(MRWebAppUtil.getYARNWebappScheme(), psAttempt.getNodeHttpAddr()), psAttempt.getNodeHttpAddr())._().td(psAttempt.getLaunchTime() == 0 ? "N/A" : new Date(psAttempt.getLaunchTime()).toString()).td(psAttempt.getFinishTime() == 0 ? "N/A" : new Date(psAttempt.getFinishTime()).toString()).td(elaspedTs == 0 ? "N/A" : StringUtils.formatTime(elaspedTs)).td().a(url(MRWebAppUtil.getYARNWebappScheme(), psAttempt.getNodeHttpAddr(), "node", "containerlogs", psAttempt.getContainerIdStr(), amContext.getUser().toString()), "log")._().td().a(url("/angel/parameterServerThreadStackPage/", psAttempt.getId().toString()), "psthreadstack")._();
tr._();
}
}
}
}
tbody._()._();
}
use of com.tencent.angel.master.ps.attempt.PSAttempt in project angel by Tencent.
the class AMParameterServer method addAndScheduleAttempt.
@SuppressWarnings("unchecked")
private void addAndScheduleAttempt() {
PSAttempt attempt = null;
writeLock.lock();
try {
attempt = createPSAttempt();
attempts.put(attempt.getId(), attempt);
LOG.info("scheduling " + attempt.getId());
runningPSAttemptId = attempt.getId();
} finally {
writeLock.unlock();
}
// getContext().getLocationManager().setPsLocation(id, null);
getContext().getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_SCHEDULE, attempt.getId()));
}
use of com.tencent.angel.master.ps.attempt.PSAttempt in project angel by Tencent.
the class AMParameterServer method createPSAttempt.
private PSAttempt createPSAttempt() {
PSAttempt attempt = new PSAttempt(ip, id, nextAttemptNumber, context);
nextAttemptNumber++;
return attempt;
}
use of com.tencent.angel.master.ps.attempt.PSAttempt in project angel by Tencent.
the class PSManagerTest method testPSDone.
@SuppressWarnings("unchecked")
@Test
public void testPSDone() throws Exception {
try {
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
ParameterServer ps = LocalClusterContext.get().getPS(psAttempt0Id).getPS();
Location masterLoc = ps.getMasterLocation();
TConnection connection = TConnectionManager.getConnection(ps.getConf());
MasterProtocol master = connection.getMasterService(masterLoc.getIp(), masterLoc.getPort());
WorkerDoneRequest workerRequest = WorkerDoneRequest.newBuilder().setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker0Attempt0Id)).build();
WorkerDoneResponse workerResponse = master.workerDone(null, workerRequest);
assertEquals(workerResponse.getCommand(), WorkerCommandProto.W_SUCCESS);
Thread.sleep(5000);
angelAppMaster.getAppContext().getEventHandler().handle(new AppEvent(AppEventType.COMMIT));
PSDoneRequest request = PSDoneRequest.newBuilder().setPsAttemptId(ProtobufUtil.convertToIdProto(psAttempt0Id)).build();
master.psDone(null, request);
Thread.sleep(5000);
ParameterServerManager psManager = angelAppMaster.getAppContext().getParameterServerManager();
AMParameterServer amPs = psManager.getParameterServer(psId);
PSAttempt psAttempt = amPs.getPSAttempt(psAttempt0Id);
assertEquals(psAttempt.getInternalState(), PSAttemptStateInternal.SUCCESS);
assertTrue(amPs.getState() == AMParameterServerState.SUCCESS);
assertEquals(amPs.getNextAttemptNumber(), 1);
assertNull(amPs.getRunningAttemptId());
assertEquals(amPs.getSuccessAttemptId(), psAttempt0Id);
assertEquals(amPs.getPSAttempts().size(), 1);
} catch (Exception x) {
LOG.error("run testPSDone failed ", x);
throw x;
}
}
Aggregations