use of com.tencent.angel.ps.PSAttemptId in project angel by Tencent.
the class ParameterServerThreadStackBlock method render.
@Override
protected void render(Block html) {
set(TITLE, join("Angel ParameterServerThread ", $(PSATTEMPT_ID)));
PSAttemptId psAttempttId = null;
try {
psAttempttId = new PSAttemptId($(PSATTEMPT_ID));
} catch (UnvalidIdStrException e) {
LOG.error("unvalid id string, ", e);
return;
}
try {
LOG.info("start init PSClient");
PSClient psClient = new PSClient(amContext, psAttempttId);
String info = psClient.getThreadStack();
html.pre()._(info)._();
} catch (IOException | ServiceException e) {
LOG.error("get thread stack from ps " + psAttempttId + " failed. ", e);
}
}
use of com.tencent.angel.ps.PSAttemptId in project angel by Tencent.
the class MasterRecoverTest method setup.
@Before
public void setup() throws Exception {
try {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
angelClient.addMatrix(mMatrix);
MatrixContext mMatrix2 = new MatrixContext();
mMatrix2.setName("w2");
mMatrix2.setRowNum(1);
mMatrix2.setColNum(100000);
mMatrix2.setMaxRowNumInBlock(1);
mMatrix2.setMaxColNumInBlock(50000);
mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
angelClient.addMatrix(mMatrix2);
angelClient.startPSServer();
angelClient.run();
Thread.sleep(5000);
group0Id = new WorkerGroupId(0);
worker0Id = new WorkerId(group0Id, 0);
worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
use of com.tencent.angel.ps.PSAttemptId in project angel by Tencent.
the class MatrixMetaManagerTest method setup.
@BeforeClass
public static void setup() throws Exception {
try {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10000);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
angelClient.addMatrix(mMatrix);
MatrixContext mMatrix2 = new MatrixContext();
mMatrix2.setName("w2");
mMatrix2.setRowNum(1);
mMatrix2.setColNum(100000);
mMatrix2.setMaxRowNumInBlock(1);
mMatrix2.setMaxColNumInBlock(50000);
mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
angelClient.addMatrix(mMatrix2);
angelClient.startPSServer();
angelClient.run();
Thread.sleep(5000);
group0Id = new WorkerGroupId(0);
worker0Id = new WorkerId(group0Id, 0);
worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
use of com.tencent.angel.ps.PSAttemptId in project angel by Tencent.
the class PSManagerTest method testPSManager.
@Test
public void testPSManager() throws Exception {
try {
LOG.info("===========================testPSManager===============================");
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
ParameterServerManager psManager = angelAppMaster.getAppContext().getParameterServerManager();
Map<ParameterServerId, AMParameterServer> psMap = psManager.getParameterServerMap();
assertEquals(psMap.size(), 1);
AMParameterServer ps = psMap.get(psId);
assertTrue(ps != null);
assertEquals(ps.getId(), psId);
assertEquals(ps.getState(), AMParameterServerState.RUNNING);
Map<PSAttemptId, PSAttempt> psAttempts = ps.getPSAttempts();
assertEquals(psAttempts.size(), 1);
PSAttempt psAttempt = psAttempts.get(psAttempt0Id);
assertEquals(psAttempt.getInternalState(), PSAttemptStateInternal.RUNNING);
} catch (Exception x) {
LOG.error("run testPSManager failed ", x);
throw x;
}
}
use of com.tencent.angel.ps.PSAttemptId in project angel by Tencent.
the class PeriodHATest method testHA.
@Test
public void testHA() throws Exception {
ParameterServerId ps1Id = new ParameterServerId(0);
final ParameterServerId ps2Id = new ParameterServerId(1);
PSAttemptId ps1Attempt0Id = new PSAttemptId(ps1Id, 0);
PSAttemptId ps2Attempt0Id = new PSAttemptId(ps2Id, 0);
PSAttemptId ps2Attempt1Id = new PSAttemptId(ps2Id, 1);
ParameterServer ps1Attempt0 = LocalClusterContext.get().getPS(ps1Attempt0Id).getPS();
ParameterServer ps2Attempt0 = LocalClusterContext.get().getPS(ps2Attempt0Id).getPS();
WorkerId worker0Id = new WorkerId(new WorkerGroupId(0), 0);
WorkerAttemptId worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
Worker worker0 = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
TaskContext task0Context = worker0.getTaskManager().getRunningTask().get(task0Id).getTaskContext();
MatrixClient matrixClient = task0Context.getMatrix("w1");
int iterNum = 20;
for (int i = 0; i < iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
MatrixStorageManager ps2Storage = ps2Attempt0.getMatrixStorageManager();
ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps2w1.getPartition(0));
assertNotNull(ps2w1.getPartition(1));
row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
part0Size = ps2w1.getRow(0, 0).size();
row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
part1Size = ps2w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
LOG.info("===================================================================ps2 failed");
ps2Attempt0.failed("exit");
for (int i = iterNum; i < 2 * iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
ParameterServer ps2Attempt = LocalClusterContext.get().getPS(ps2Attempt1Id).getPS();
for (int i = iterNum * 2; i < 3 * iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
MatrixStorageManager ps2Storage = ps2Attempt.getMatrixStorageManager();
ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps2w1.getPartition(0));
assertNotNull(ps2w1.getPartition(1));
row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
part0Size = ps2w1.getRow(0, 0).size();
row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
part1Size = ps2w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
}
Aggregations