use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class LocalContainerLauncher method launch.
@SuppressWarnings("unchecked")
private void launch(ContainerLauncherEvent event) {
Id id = event.getId();
if (id instanceof PSAttemptId) {
LocalPS ps = new LocalPS((PSAttemptId) id, context.getMasterService().getLocation(), context.getConf());
context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_CONTAINER_LAUNCHED, (PSAttemptId) id));
try {
ps.start();
LocalClusterContext.get().addPS((PSAttemptId) id, ps);
} catch (Exception e) {
LOG.error("launch ps failed.", e);
context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_CONTAINER_LAUNCH_FAILED, (PSAttemptId) id));
}
} else if (id instanceof WorkerAttemptId) {
LocalWorker worker = new LocalWorker(context.getConf(), context.getApplicationId(), context.getUser(), (WorkerAttemptId) id, context.getMasterService().getLocation(), 0, false);
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.CONTAINER_LAUNCHED, (WorkerAttemptId) id));
try {
worker.start();
LocalClusterContext.get().addWorker((WorkerAttemptId) id, worker);
} catch (Exception e) {
LOG.error("launch worker failed.", e);
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.CONTAINER_LAUNCH_FAILED, (WorkerAttemptId) id));
}
}
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class WorkerManager method checkHBTimeOut.
public void checkHBTimeOut() {
// check whether worker heartbeat timeout
Iterator<Map.Entry<WorkerAttemptId, Long>> workerIt = workerLastHeartbeatTS.entrySet().iterator();
long currentTs = System.currentTimeMillis();
while (workerIt.hasNext()) {
Entry<WorkerAttemptId, Long> workerEntry = workerIt.next();
if (currentTs - workerEntry.getValue() > workerTimeOutMS) {
LOG.error(workerEntry.getKey() + " heartbeat timeout!!!");
context.getEventHandler().handle(new WorkerAttemptDiagnosticsUpdateEvent(workerEntry.getKey(), "heartbeat timeout"));
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.ERROR, workerEntry.getKey()));
workerIt.remove();
}
}
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class IndexGetRowsHashTest method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 2);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
conf.setBoolean("use.new.split", true);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse double matrix
MatrixContext sMat = new MatrixContext();
sMat.setName(SPARSE_DOUBLE_MAT);
sMat.setRowNum(rowNum);
sMat.setRowType(RowType.T_DOUBLE_SPARSE);
sMat.setPartitionNum(partNum);
sMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sMat);
// add sparse float matrix
MatrixContext sfMat = new MatrixContext();
sfMat.setName(SPARSE_FLOAT_MAT);
sfMat.setRowNum(rowNum);
sfMat.setPartitionNum(partNum);
sfMat.setPartitionerClass(HashPartitioner.class);
sfMat.setRowType(RowType.T_FLOAT_SPARSE);
angelClient.addMatrix(sfMat);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(SPARSE_INT_MAT);
siMat.setRowNum(rowNum);
siMat.setPartitionNum(partNum);
siMat.setPartitionerClass(HashPartitioner.class);
siMat.setRowType(RowType.T_INT_SPARSE);
angelClient.addMatrix(siMat);
// add sparse long matrix
MatrixContext slMat = new MatrixContext();
slMat.setName(SPARSE_LONG_MAT);
slMat.setRowNum(rowNum);
slMat.setPartitionNum(partNum);
slMat.setPartitionerClass(HashPartitioner.class);
slMat.setRowType(RowType.T_LONG_SPARSE);
angelClient.addMatrix(slMat);
// add sparse long-key double matrix
MatrixContext dLongKeysMatrix = new MatrixContext();
dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
dLongKeysMatrix.setRowNum(rowNum);
dLongKeysMatrix.setPartitionNum(partNum);
dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
angelClient.addMatrix(dLongKeysMatrix);
// add sparse long-key float matrix
MatrixContext slfMatrix = new MatrixContext();
slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
slfMatrix.setRowNum(rowNum);
slfMatrix.setPartitionNum(partNum);
slfMatrix.setPartitionerClass(HashPartitioner.class);
slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
angelClient.addMatrix(slfMatrix);
// add sparse long-key int matrix
MatrixContext sliMatrix = new MatrixContext();
sliMatrix.setName(SPARSE_INT_LONG_MAT);
sliMatrix.setRowNum(rowNum);
sliMatrix.setPartitionNum(partNum);
sliMatrix.setPartitionerClass(HashPartitioner.class);
sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
angelClient.addMatrix(sliMatrix);
// add sparse long-key long matrix
MatrixContext sllMatrix = new MatrixContext();
sllMatrix.setName(SPARSE_LONG_LONG_MAT);
sllMatrix.setRowNum(rowNum);
sllMatrix.setPartitionNum(partNum);
sllMatrix.setPartitionerClass(HashPartitioner.class);
sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
angelClient.addMatrix(sllMatrix);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class PSAgentTest method setup.
@BeforeClass
public static void setup() throws Exception {
try {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
conf.set(AngelConf.ANGEL_PS_ROUTER_TYPE, "range");
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
// conf.setInt(AngelConf.ANGEL_PREPROCESS_VECTOR_MAXDIM, 10000);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
mMatrix.setPartitionerClass(ColumnRangePartitioner.class);
angelClient.addMatrix(mMatrix);
MatrixContext mMatrix2 = new MatrixContext();
mMatrix2.setName("w2");
mMatrix2.setRowNum(1);
mMatrix2.setColNum(100000);
mMatrix2.setMaxRowNumInBlock(1);
mMatrix2.setMaxColNumInBlock(50000);
mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
mMatrix2.setPartitionerClass(ColumnRangePartitioner.class);
angelClient.addMatrix(mMatrix2);
angelClient.startPSServer();
angelClient.run();
Thread.sleep(5000);
group0Id = new WorkerGroupId(0);
worker0Id = new WorkerId(group0Id, 0);
worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class UpdateRowsHashTest method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 2);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
conf.setBoolean("use.new.split", false);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse double matrix
MatrixContext sMat = new MatrixContext();
sMat.setName(SPARSE_DOUBLE_MAT);
sMat.setRowNum(rowNum);
sMat.setPartitionerClass(HashPartitioner.class);
sMat.setPartitionNum(partNum);
sMat.setRowType(RowType.T_DOUBLE_SPARSE);
angelClient.addMatrix(sMat);
// add sparse float matrix
MatrixContext sfMat = new MatrixContext();
sfMat.setName(SPARSE_FLOAT_MAT);
sfMat.setRowNum(rowNum);
sfMat.setPartitionerClass(HashPartitioner.class);
sfMat.setPartitionNum(partNum);
sfMat.setRowType(RowType.T_FLOAT_SPARSE);
angelClient.addMatrix(sfMat);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(SPARSE_INT_MAT);
siMat.setRowNum(rowNum);
siMat.setPartitionerClass(HashPartitioner.class);
siMat.setPartitionNum(partNum);
siMat.setRowType(RowType.T_INT_SPARSE);
angelClient.addMatrix(siMat);
// add sparse long matrix
MatrixContext slMat = new MatrixContext();
slMat.setName(SPARSE_LONG_MAT);
slMat.setRowNum(rowNum);
slMat.setPartitionerClass(HashPartitioner.class);
slMat.setPartitionNum(partNum);
slMat.setRowType(RowType.T_LONG_SPARSE);
angelClient.addMatrix(slMat);
// add sparse long-key double matrix
MatrixContext dLongKeysMatrix = new MatrixContext();
dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
dLongKeysMatrix.setRowNum(rowNum);
dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
dLongKeysMatrix.setPartitionNum(partNum);
dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
angelClient.addMatrix(dLongKeysMatrix);
// add sparse long-key float matrix
MatrixContext slfMatrix = new MatrixContext();
slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
slfMatrix.setRowNum(rowNum);
slfMatrix.setPartitionerClass(HashPartitioner.class);
slfMatrix.setPartitionNum(partNum);
slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
angelClient.addMatrix(slfMatrix);
// add sparse long-key int matrix
MatrixContext sliMatrix = new MatrixContext();
sliMatrix.setName(SPARSE_INT_LONG_MAT);
sliMatrix.setRowNum(rowNum);
sliMatrix.setPartitionerClass(HashPartitioner.class);
sliMatrix.setPartitionNum(partNum);
sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
angelClient.addMatrix(sliMatrix);
// add sparse long-key long matrix
MatrixContext sllMatrix = new MatrixContext();
sllMatrix.setName(SPARSE_LONG_LONG_MAT);
sllMatrix.setRowNum(rowNum);
sllMatrix.setPartitionerClass(HashPartitioner.class);
sllMatrix.setPartitionNum(partNum);
sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
angelClient.addMatrix(sllMatrix);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
Aggregations