use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class MasterService method workerError.
/**
* worker run failed
*
* @param controller rpc controller of protobuf
* @param request contains worker attempt id, error message
*/
@SuppressWarnings("unchecked")
@Override
public WorkerErrorResponse workerError(RpcController controller, WorkerErrorRequest request) throws ServiceException {
WorkerAttemptId workerAttemptId = ProtobufUtil.convertToId(request.getWorkerAttemptId());
LOG.info("worker attempt " + workerAttemptId + " failed, details=" + request.getMsg());
WorkerErrorResponse.Builder resBuilder = WorkerErrorResponse.newBuilder();
// if worker attempt id is not in monitor set, we should shutdown it
if (!context.getWorkerManager().isAlive(workerAttemptId)) {
resBuilder.setCommand(WorkerCommandProto.W_SHUTDOWN);
} else {
context.getWorkerManager().unRegister(workerAttemptId);
context.getEventHandler().handle(new WorkerAttemptDiagnosticsUpdateEvent(workerAttemptId, request.getMsg()));
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.ERROR, workerAttemptId));
resBuilder.setCommand(WorkerCommandProto.W_SUCCESS);
}
return resBuilder.build();
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class ProtobufUtil method convertToId.
public static WorkerAttemptId convertToId(WorkerAttemptIdProto idProto) {
WorkerAttemptId id = protoToWorkerAttemptIdMap.get(idProto);
if (id == null) {
id = new WorkerAttemptId(convertToId(idProto.getWorkerId()), idProto.getAttemptIndex());
protoToWorkerAttemptIdMap.put(idProto, id);
}
return id;
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class IncrementRowHashTest method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setBoolean("use.new.split", true);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse double matrix
MatrixContext sMat = new MatrixContext();
sMat.setName(SPARSE_DOUBLE_MAT);
sMat.setRowNum(1);
sMat.setPartitionNum(partNum);
sMat.setPartitionerClass(HashPartitioner.class);
sMat.setRowType(RowType.T_DOUBLE_SPARSE);
angelClient.addMatrix(sMat);
// add sparse float matrix
MatrixContext sfMat = new MatrixContext();
sfMat.setName(SPARSE_FLOAT_MAT);
sfMat.setRowNum(1);
sfMat.setPartitionNum(partNum);
sfMat.setPartitionerClass(HashPartitioner.class);
sfMat.setRowType(RowType.T_FLOAT_SPARSE);
angelClient.addMatrix(sfMat);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(SPARSE_INT_MAT);
siMat.setRowNum(1);
siMat.setPartitionNum(partNum);
siMat.setPartitionerClass(HashPartitioner.class);
siMat.setRowType(RowType.T_INT_SPARSE);
angelClient.addMatrix(siMat);
// add sparse long matrix
MatrixContext slMat = new MatrixContext();
slMat.setName(SPARSE_LONG_MAT);
slMat.setRowNum(1);
slMat.setPartitionNum(partNum);
slMat.setPartitionerClass(HashPartitioner.class);
slMat.setRowType(RowType.T_LONG_SPARSE);
angelClient.addMatrix(slMat);
// add sparse long-key double matrix
MatrixContext dLongKeysMatrix = new MatrixContext();
dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
dLongKeysMatrix.setRowNum(1);
dLongKeysMatrix.setPartitionNum(partNum);
dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
angelClient.addMatrix(dLongKeysMatrix);
// add sparse long-key float matrix
MatrixContext slfMatrix = new MatrixContext();
slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
slfMatrix.setRowNum(1);
slfMatrix.setPartitionNum(partNum);
slfMatrix.setPartitionerClass(HashPartitioner.class);
slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
angelClient.addMatrix(slfMatrix);
// add sparse long-key int matrix
MatrixContext sliMatrix = new MatrixContext();
sliMatrix.setName(SPARSE_INT_LONG_MAT);
sliMatrix.setRowNum(1);
sliMatrix.setPartitionNum(partNum);
sliMatrix.setPartitionerClass(HashPartitioner.class);
sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
angelClient.addMatrix(sliMatrix);
// add sparse long-key long matrix
MatrixContext sllMatrix = new MatrixContext();
sllMatrix.setName(SPARSE_LONG_LONG_MAT);
sllMatrix.setRowNum(1);
sllMatrix.setPartitionNum(partNum);
sllMatrix.setPartitionerClass(HashPartitioner.class);
sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
angelClient.addMatrix(sllMatrix);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class IndexGetRowHashTest method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 100000);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse double matrix
MatrixContext sMat = new MatrixContext();
sMat.setName(SPARSE_DOUBLE_MAT);
sMat.setRowNum(1);
sMat.setRowType(RowType.T_DOUBLE_SPARSE);
sMat.setPartitionNum(partNum);
sMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sMat);
// add sparse float matrix
MatrixContext sfMat = new MatrixContext();
sfMat.setName(SPARSE_FLOAT_MAT);
sfMat.setRowNum(1);
sfMat.setRowType(RowType.T_FLOAT_SPARSE);
sfMat.setValidIndexNum(modelSize);
sfMat.setPartitionNum(partNum);
sfMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sfMat);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(SPARSE_INT_MAT);
siMat.setRowNum(1);
siMat.setRowType(RowType.T_INT_SPARSE);
siMat.setValidIndexNum(modelSize);
siMat.setPartitionNum(partNum);
siMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(siMat);
// add sparse long matrix
MatrixContext slMat = new MatrixContext();
slMat.setName(SPARSE_LONG_MAT);
slMat.setRowNum(1);
slMat.setRowType(RowType.T_LONG_SPARSE);
slMat.setValidIndexNum(modelSize);
slMat.setPartitionNum(partNum);
slMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(slMat);
// add sparse long-key float matrix
MatrixContext sldMatrix = new MatrixContext();
sldMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
sldMatrix.setRowNum(1);
sldMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
sldMatrix.setValidIndexNum(modelSize);
sldMatrix.setPartitionNum(partNum);
sldMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sldMatrix);
// add sparse long-key float matrix
MatrixContext slfMatrix = new MatrixContext();
slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
slfMatrix.setRowNum(1);
slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
slfMatrix.setValidIndexNum(modelSize);
slfMatrix.setPartitionNum(partNum);
slfMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(slfMatrix);
// add sparse long-key int matrix
MatrixContext sliMatrix = new MatrixContext();
sliMatrix.setName(SPARSE_INT_LONG_MAT);
sliMatrix.setRowNum(1);
sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
sliMatrix.setValidIndexNum(modelSize);
sliMatrix.setPartitionNum(partNum);
sliMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sliMatrix);
// add sparse long-key long matrix
MatrixContext sllMatrix = new MatrixContext();
sllMatrix.setName(SPARSE_LONG_LONG_MAT);
sllMatrix.setRowNum(1);
sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
sllMatrix.setValidIndexNum(modelSize);
sllMatrix.setPartitionNum(partNum);
sllMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sllMatrix);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.
the class MasterService method workerDone.
/**
* worker run over successfully
*
* @param controller rpc controller of protobuf
* @param request contains worker attempt id
*/
@SuppressWarnings("unchecked")
@Override
public WorkerDoneResponse workerDone(RpcController controller, WorkerDoneRequest request) throws ServiceException {
WorkerAttemptId workerAttemptId = ProtobufUtil.convertToId(request.getWorkerAttemptId());
LOG.info("worker attempt " + workerAttemptId + " is done");
WorkerDoneResponse.Builder resBuilder = WorkerDoneResponse.newBuilder();
// if worker attempt id is not in monitor set, we should shutdown it
if (!context.getWorkerManager().isAlive(workerAttemptId)) {
resBuilder.setCommand(WorkerCommandProto.W_SHUTDOWN);
} else {
context.getWorkerManager().unRegister(workerAttemptId);
resBuilder.setCommand(WorkerCommandProto.W_SUCCESS);
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.DONE, workerAttemptId));
}
return resBuilder.build();
}
Aggregations