use of com.tencent.angel.worker.WorkerGroupId in project angel by Tencent.
the class WorkerGroupBlock method render.
@Override
protected void render(Block html) {
String workerGroupIdSr = $(WORKERGROUP_ID);
if (workerGroupIdSr.isEmpty()) {
html.p()._("Sorry, can't do anything without a WorkerGroupId.")._();
return;
}
WorkerGroupId workerGroupId;
try {
workerGroupId = new WorkerGroupId(workerGroupIdSr);
} catch (UnvalidIdStrException e) {
LOG.error("unvalid id string, ", e);
return;
}
AMWorkerGroup workerGroup = amContext.getWorkerManager().getWorkerGroup(workerGroupId);
if (workerGroup == null) {
html.p()._("Sorry, can't find group " + workerGroupId)._();
return;
}
set(TITLE, join("Angel WorkerGroup ", $(WORKERGROUP_ID)));
html.h1(workerGroupIdSr);
TABLE<DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
TR<THEAD<TABLE<DIV<Hamlet>>>> headTr = table.thead().tr();
headTr.th(_TH, "id").th(_TH, "state").th(_TH, "node address").th(_TH, "start time").th(_TH, "end time").th(_TH, "elapsed time").th(_TH, "log").th(_TH, "threadstack").th(_TH, "workercounter");
headTr._()._();
TBODY<TABLE<DIV<Hamlet>>> tbody = table.tbody();
for (AMWorker worker : workerGroup.getWorkerSet()) {
Map<WorkerAttemptId, WorkerAttempt> workerAttempts = worker.getAttempts();
for (WorkerAttempt workerAttempt : workerAttempts.values()) {
TR<TBODY<TABLE<DIV<Hamlet>>>> tr = tbody.tr();
long elaspedTs = 0;
if (workerAttempt.getLaunchTime() != 0 && workerAttempt.getFinishTime() != 0) {
elaspedTs = workerAttempt.getFinishTime() - workerAttempt.getLaunchTime();
} else if (workerAttempt.getLaunchTime() != 0 && workerAttempt.getFinishTime() == 0) {
elaspedTs = System.currentTimeMillis() - workerAttempt.getLaunchTime();
}
if (workerAttempt.getNodeHttpAddr() == null) {
tr.td().a(url("angel/workerPage", workerAttempt.getId().toString()), workerAttempt.getId().toString())._().td(workerAttempt.getState().toString()).td("N/A").td((workerAttempt.getLaunchTime() == 0) ? "N/A" : new Date(workerAttempt.getLaunchTime()).toString()).td((workerAttempt.getFinishTime() == 0) ? "N/A" : new Date(workerAttempt.getFinishTime()).toString()).td((elaspedTs == 0) ? "N/A" : StringUtils.formatTime(elaspedTs)).td("N/A").td("N/A").td("N/A");
} else {
tr.td().a(url("angel/workerPage", workerAttempt.getId().toString()), workerAttempt.getId().toString())._().td(workerAttempt.getState().toString()).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr()), workerAttempt.getNodeHttpAddr())._().td((workerAttempt.getLaunchTime() == 0) ? "N/A" : new Date(workerAttempt.getLaunchTime()).toString()).td((workerAttempt.getFinishTime() == 0) ? "N/A" : new Date(workerAttempt.getFinishTime()).toString()).td((elaspedTs == 0) ? "N/A" : StringUtils.formatTime(elaspedTs)).td().a(url(MRWebAppUtil.getYARNWebappScheme(), workerAttempt.getNodeHttpAddr(), "node", "containerlogs", workerAttempt.getContainerIdStr(), amContext.getUser().toString()), "log")._().td().a(url("angel/workerThreadStackPage/", workerAttempt.getId().toString()), "workerthreadstack")._().td().a(url("angel/workerCounterPage/", workerAttempt.getId().toString()), "workercounter")._();
}
tr._();
}
}
tbody._()._()._();
}
use of com.tencent.angel.worker.WorkerGroupId in project angel by Tencent.
the class GetNodeFeatsTest2 method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, "file:///F:\\test\\model_1");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
// conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(NODE);
siMat.setRowType(RowType.T_ANY_LONGKEY_SPARSE);
siMat.setRowNum(1);
siMat.setColNum(10);
siMat.setMaxColNumInBlock(5);
siMat.setMaxRowNumInBlock(1);
// siMat.setValidIndexNum(100);
// siMat.setColNum(10000000000L);
siMat.setValueType(Node.class);
// siMat.setPartitionStorageClass(LongElementMapStorage.class);
// siMat.setPartitionClass(CSRPartition.class);
angelClient.addMatrix(siMat);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
use of com.tencent.angel.worker.WorkerGroupId in project angel by Tencent.
the class WorkerManager method startAllWorker.
/**
* init and start all workers
*/
public void startAllWorker() {
LOG.info("to start all workers.....");
try {
writeLock.lock();
initWorkers();
for (int i = 0; i < workerGroupMap.size(); i++) {
AMWorkerGroup group = workerGroupMap.get(new WorkerGroupId(i));
for (AMWorker worker : group.getWorkerSet()) {
worker.handle(new AMWorkerEvent(AMWorkerEventType.SCHEDULE, worker.getId()));
}
}
isInited = true;
} finally {
writeLock.unlock();
}
}
use of com.tencent.angel.worker.WorkerGroupId in project angel by Tencent.
the class WorkerManager method getDetailWorkerExitMessage.
private String getDetailWorkerExitMessage() {
StringBuilder sb = new StringBuilder();
sb.append("killed and failed workergroup is over tolerate ").append(tolerateFailedGroup);
sb.append("There are some Workers failed\n");
if (!failedGroups.isEmpty()) {
sb.append("failed workergroups:");
for (WorkerGroupId groupId : failedGroups) {
sb.append("\n");
sb.append(groupId);
sb.append(". ");
sb.append(StringUtils.join("\n", workerGroupMap.get(groupId).getDiagnostics()));
}
sb.append("\n");
}
if (!killedGroups.isEmpty()) {
sb.append("killed workergroups:");
for (WorkerGroupId groupId : killedGroups) {
sb.append("\n");
sb.append(groupId);
sb.append(". ");
sb.append(StringUtils.join("\n", workerGroupMap.get(groupId).getDiagnostics()));
}
sb.append("\n");
}
return sb.toString();
}
Aggregations