use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class PSAgentTest method testLocationCache.
@Test
public void testLocationCache() throws Exception {
try {
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
assertTrue(taskManager != null);
WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
assertTrue(workerManager != null);
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
PSAgent psAgent = worker.getPSAgent();
assertTrue(psAgent != null);
PSAgentLocationManager locationCache = psAgent.getLocationManager();
assertTrue(locationCache != null);
// test master location
Location masterLoc = locationCache.getMasterLocation();
String ipRegex = "(2[5][0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})";
Pattern pattern = Pattern.compile(ipRegex);
Matcher matcher = pattern.matcher(masterLoc.getIp());
assertTrue(matcher.matches());
assertTrue(masterLoc.getPort() >= 1 && masterLoc.getPort() <= 65535);
// test ps location
Location psLoc = locationCache.getPsLocation(psId);
matcher = pattern.matcher(psLoc.getIp());
assertTrue(matcher.matches());
assertTrue(psLoc.getPort() >= 1 && psLoc.getPort() <= 65535);
// assertEquals(psLoc, locationCache.updateAndGetPSLocation(psId));
// test all ps ids
ParameterServerId[] allPSIds = locationCache.getPsIds();
assertEquals(allPSIds.length, 1);
assertEquals(allPSIds[0], psId);
} catch (Exception x) {
LOG.error("run testLocationCache failed ", x);
throw x;
}
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class IncrementRowHashTest method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setBoolean("use.new.split", true);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse double matrix
MatrixContext sMat = new MatrixContext();
sMat.setName(SPARSE_DOUBLE_MAT);
sMat.setRowNum(1);
sMat.setPartitionNum(partNum);
sMat.setPartitionerClass(HashPartitioner.class);
sMat.setRowType(RowType.T_DOUBLE_SPARSE);
angelClient.addMatrix(sMat);
// add sparse float matrix
MatrixContext sfMat = new MatrixContext();
sfMat.setName(SPARSE_FLOAT_MAT);
sfMat.setRowNum(1);
sfMat.setPartitionNum(partNum);
sfMat.setPartitionerClass(HashPartitioner.class);
sfMat.setRowType(RowType.T_FLOAT_SPARSE);
angelClient.addMatrix(sfMat);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(SPARSE_INT_MAT);
siMat.setRowNum(1);
siMat.setPartitionNum(partNum);
siMat.setPartitionerClass(HashPartitioner.class);
siMat.setRowType(RowType.T_INT_SPARSE);
angelClient.addMatrix(siMat);
// add sparse long matrix
MatrixContext slMat = new MatrixContext();
slMat.setName(SPARSE_LONG_MAT);
slMat.setRowNum(1);
slMat.setPartitionNum(partNum);
slMat.setPartitionerClass(HashPartitioner.class);
slMat.setRowType(RowType.T_LONG_SPARSE);
angelClient.addMatrix(slMat);
// add sparse long-key double matrix
MatrixContext dLongKeysMatrix = new MatrixContext();
dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
dLongKeysMatrix.setRowNum(1);
dLongKeysMatrix.setPartitionNum(partNum);
dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
angelClient.addMatrix(dLongKeysMatrix);
// add sparse long-key float matrix
MatrixContext slfMatrix = new MatrixContext();
slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
slfMatrix.setRowNum(1);
slfMatrix.setPartitionNum(partNum);
slfMatrix.setPartitionerClass(HashPartitioner.class);
slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
angelClient.addMatrix(slfMatrix);
// add sparse long-key int matrix
MatrixContext sliMatrix = new MatrixContext();
sliMatrix.setName(SPARSE_INT_LONG_MAT);
sliMatrix.setRowNum(1);
sliMatrix.setPartitionNum(partNum);
sliMatrix.setPartitionerClass(HashPartitioner.class);
sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
angelClient.addMatrix(sliMatrix);
// add sparse long-key long matrix
MatrixContext sllMatrix = new MatrixContext();
sllMatrix.setName(SPARSE_LONG_LONG_MAT);
sllMatrix.setRowNum(1);
sllMatrix.setPartitionNum(partNum);
sllMatrix.setPartitionerClass(HashPartitioner.class);
sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
angelClient.addMatrix(sllMatrix);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class IndexGetRowHashTest method setup.
@Before
public void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 100000);
conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add sparse double matrix
MatrixContext sMat = new MatrixContext();
sMat.setName(SPARSE_DOUBLE_MAT);
sMat.setRowNum(1);
sMat.setRowType(RowType.T_DOUBLE_SPARSE);
sMat.setPartitionNum(partNum);
sMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sMat);
// add sparse float matrix
MatrixContext sfMat = new MatrixContext();
sfMat.setName(SPARSE_FLOAT_MAT);
sfMat.setRowNum(1);
sfMat.setRowType(RowType.T_FLOAT_SPARSE);
sfMat.setValidIndexNum(modelSize);
sfMat.setPartitionNum(partNum);
sfMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sfMat);
// add sparse float matrix
MatrixContext siMat = new MatrixContext();
siMat.setName(SPARSE_INT_MAT);
siMat.setRowNum(1);
siMat.setRowType(RowType.T_INT_SPARSE);
siMat.setValidIndexNum(modelSize);
siMat.setPartitionNum(partNum);
siMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(siMat);
// add sparse long matrix
MatrixContext slMat = new MatrixContext();
slMat.setName(SPARSE_LONG_MAT);
slMat.setRowNum(1);
slMat.setRowType(RowType.T_LONG_SPARSE);
slMat.setValidIndexNum(modelSize);
slMat.setPartitionNum(partNum);
slMat.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(slMat);
// add sparse long-key float matrix
MatrixContext sldMatrix = new MatrixContext();
sldMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
sldMatrix.setRowNum(1);
sldMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
sldMatrix.setValidIndexNum(modelSize);
sldMatrix.setPartitionNum(partNum);
sldMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sldMatrix);
// add sparse long-key float matrix
MatrixContext slfMatrix = new MatrixContext();
slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
slfMatrix.setRowNum(1);
slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
slfMatrix.setValidIndexNum(modelSize);
slfMatrix.setPartitionNum(partNum);
slfMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(slfMatrix);
// add sparse long-key int matrix
MatrixContext sliMatrix = new MatrixContext();
sliMatrix.setName(SPARSE_INT_LONG_MAT);
sliMatrix.setRowNum(1);
sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
sliMatrix.setValidIndexNum(modelSize);
sliMatrix.setPartitionNum(partNum);
sliMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sliMatrix);
// add sparse long-key long matrix
MatrixContext sllMatrix = new MatrixContext();
sllMatrix.setName(SPARSE_LONG_LONG_MAT);
sllMatrix.setRowNum(1);
sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
sllMatrix.setValidIndexNum(modelSize);
sllMatrix.setPartitionNum(partNum);
sllMatrix.setPartitionerClass(HashPartitioner.class);
angelClient.addMatrix(sllMatrix);
// Start PS
angelClient.startPSServer();
// Start to run application
angelClient.run();
Thread.sleep(5000);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
WorkerGroupId workerGroupId = new WorkerGroupId(0);
workerId = new WorkerId(workerGroupId, 0);
workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class MasterService method getAllPSLocation.
/**
* get all parameter server locations.
*
* @param controller rpc controller of protobuf
*/
@Override
public GetAllPSLocationResponse getAllPSLocation(RpcController controller, GetAllPSLocationRequest request) {
GetAllPSLocationResponse.Builder resBuilder = GetAllPSLocationResponse.newBuilder();
LocationManager locationManager = context.getLocationManager();
ParameterServerId[] psIds = locationManager.getPsIds();
for (int i = 0; i < psIds.length; i++) {
resBuilder.addPsLocations(ProtobufUtil.convertToPSLocProto(psIds[i], locationManager.getPsLocation(psIds[i])));
}
return resBuilder.build();
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class MasterService method killPS.
@Override
public KillPSResponse killPS(RpcController controller, KillPSRequest request) throws ServiceException {
PSAttemptId psAttemptId = new PSAttemptId(new ParameterServerId(request.getPsId()), request.getAttemptIndex());
// LOG.info("error happened in psAttempt " + psAttemptId + " error msg=" + request.getMsg());
// remove this parameter server attempt from monitor set
context.getParameterServerManager().unRegister(psAttemptId);
context.getEventHandler().handle(new PSAttemptDiagnosticsUpdateEvent("kill by client", psAttemptId));
context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_FAILMSG, psAttemptId));
return KillPSResponse.getDefaultInstance();
}
Aggregations