use of com.tencent.angel.psagent.client.MasterClient in project angel by Tencent.
the class PSAgent method initAndStart.
public void initAndStart() throws Exception {
// Get ps locations from master and put them to the location cache.
locationManager = new PSAgentLocationManager(PSAgentContext.get());
locationManager.setMasterLocation(masterLocation);
// Build and initialize rpc client to master
masterClient = new MasterClient();
masterClient.init();
// Build local location
String localIp = NetUtils.getRealLocalIP();
int port = NetUtils.chooseAListenPort(conf);
location = new Location(localIp, port);
// Initialize matrix meta information
clockCache = new ClockCache();
List<MatrixMeta> matrixMetas = masterClient.getMatrices();
LOG.info("===========================PSAgent get matrices from master," + matrixMetas.size());
this.matrixMetaManager = new PSAgentMatrixMetaManager(clockCache);
matrixMetaManager.addMatrices(matrixMetas);
Map<ParameterServerId, Location> psIdToLocMap = masterClient.getPSLocations();
List<ParameterServerId> psIds = new ArrayList<>(psIdToLocMap.keySet());
Collections.sort(psIds, new Comparator<ParameterServerId>() {
@Override
public int compare(ParameterServerId s1, ParameterServerId s2) {
return s1.getIndex() - s2.getIndex();
}
});
int size = psIds.size();
locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
for (int i = 0; i < size; i++) {
if (psIdToLocMap.containsKey(psIds.get(i))) {
locationManager.setPsLocation(psIds.get(i), psIdToLocMap.get(psIds.get(i)));
}
}
matrixTransClient = new MatrixTransportClient();
matrixClientAdapter = new MatrixClientAdapter();
opLogCache = new MatrixOpLogCache();
matrixStorageManager = new MatrixStorageManager();
matricesCache = new MatricesCache();
int staleness = conf.getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
consistencyController = new ConsistencyController(staleness);
consistencyController.init();
psAgentInitFinishedFlag.set(true);
// Start heartbeat thread if need
if (needHeartBeat) {
startHeartbeatThread();
}
// Start all services
matrixTransClient.start();
matrixClientAdapter.start();
clockCache.start();
opLogCache.start();
matricesCache.start();
}
use of com.tencent.angel.psagent.client.MasterClient in project angel by Tencent.
the class WorkerTest method testMaster.
@Test
public void testMaster() throws Exception {
try {
localWorker = LocalClusterContext.get().getWorker(worker0Attempt0Id);
worker = localWorker.getWorker();
localMaster = LocalClusterContext.get().getMaster();
master = localMaster.getAppMaster();
assertTrue(master != null);
// master location
Location masterLoc = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMasterService().getLocation();
assertEquals(masterLoc, worker.getMasterLocation());
// masterClient
MasterClient masterClient = worker.getPSAgent().getMasterClient();
WorkerMasterServiceProtos.WorkerRegisterResponse response = masterClient.workerRegister();
assertTrue(response != null);
assertEquals(WorkerMasterServiceProtos.WorkerCommandProto.W_SUCCESS, response.getCommand());
} catch (Exception x) {
LOG.error("run testMaster failed ", x);
throw x;
}
}
use of com.tencent.angel.psagent.client.MasterClient in project angel by Tencent.
the class TaskContext method createMatrix.
/**
* Create matrix.
*
* @param matrixContext the matrix context
* @param timeOutMs the time out ms
* @return the matrix meta
* @throws ServiceException
* @throws TimeOutException
* @throws InterruptedException
* @throws IOException
*/
public MatrixMeta createMatrix(MatrixContext matrixContext, long timeOutMs) throws ServiceException, TimeOutException, InterruptedException, IOException, ClassNotFoundException {
MasterClient masterClient = WorkerContext.get().getPSAgent().getMasterClient();
masterClient.createMatrix(matrixContext, timeOutMs);
return masterClient.getMatrix(matrixContext.getName());
}
use of com.tencent.angel.psagent.client.MasterClient in project angel by Tencent.
the class MatrixMetaManagerTest method testCreateMatrix.
@Test
public void testCreateMatrix() throws Exception {
try {
LOG.info("===========================testCreateMatrix===============================");
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
MasterClient masterClient = worker.getPSAgent().getMasterClient();
int w3Id = -1;
int w4Id = -1;
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w3");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
masterClient.createMatrix(mMatrix, 10000);
mMatrix.setName("w4");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
masterClient.createMatrix(mMatrix, 10000);
MatrixMeta w3Meta = worker.getPSAgent().getMatrixMetaManager().getMatrixMeta("w3");
MatrixMeta w4Meta = worker.getPSAgent().getMatrixMetaManager().getMatrixMeta("w4");
assertEquals(w3Meta.getRowNum(), 1);
assertEquals(w3Meta.getColNum(), 100000);
assertEquals(w3Meta.getRowType(), RowType.T_DOUBLE_DENSE);
assertEquals(w4Meta.getRowNum(), 1);
assertEquals(w4Meta.getColNum(), 100000);
assertEquals(w4Meta.getRowType(), RowType.T_DOUBLE_DENSE);
w3Id = w3Meta.getId();
w4Id = w4Meta.getId();
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMMatrixMetaManager matrixMetaManager = angelAppMaster.getAppContext().getMatrixMetaManager();
MatrixMeta matrixw3Proto = matrixMetaManager.getMatrix("w3");
MatrixMeta matrixw4Proto = matrixMetaManager.getMatrix("w4");
assertNotNull(matrixw3Proto);
assertNotNull(matrixw4Proto);
assertEquals(matrixw3Proto.getRowNum(), 1);
assertEquals(matrixw3Proto.getColNum(), 100000);
assertEquals(matrixw3Proto.getPartitionMetas().size(), 2);
Map<Integer, PartitionMeta> w3Parts = matrixw3Proto.getPartitionMetas();
assertEquals(w3Parts.get(0).getPss().get(0), psId);
assertEquals(w3Parts.get(0).getPartId(), 0);
assertEquals(w3Parts.get(0).getStartRow(), 0);
assertEquals(w3Parts.get(0).getEndRow(), 1);
assertEquals(w3Parts.get(0).getStartCol(), 0);
assertEquals(w3Parts.get(0).getEndCol(), 50000);
assertEquals(w3Parts.get(1).getPartId(), 1);
assertEquals(w3Parts.get(1).getStartRow(), 0);
assertEquals(w3Parts.get(1).getEndRow(), 1);
assertEquals(w3Parts.get(1).getStartCol(), 50000);
assertEquals(w3Parts.get(1).getEndCol(), 100000);
Map<Integer, PartitionMeta> w4Parts = matrixw4Proto.getPartitionMetas();
assertEquals(w4Parts.get(0).getPss().get(0), psId);
assertEquals(w4Parts.get(0).getPartId(), 0);
assertEquals(w4Parts.get(0).getStartRow(), 0);
assertEquals(w4Parts.get(0).getEndRow(), 1);
assertEquals(w4Parts.get(0).getStartCol(), 0);
assertEquals(w4Parts.get(0).getEndCol(), 50000);
assertEquals(w4Parts.get(1).getPartId(), 1);
assertEquals(w4Parts.get(1).getStartRow(), 0);
assertEquals(w4Parts.get(1).getEndRow(), 1);
assertEquals(w4Parts.get(1).getStartCol(), 50000);
assertEquals(w4Parts.get(1).getEndCol(), 100000);
ParameterServer ps = LocalClusterContext.get().getPS(psAttempt0Id).getPS();
PSMatrixMetaManager matrixPartManager = ps.getMatrixMetaManager();
PartitionMeta w3Part0 = matrixPartManager.getPartMeta(w3Id, 0);
PartitionMeta w3Part1 = matrixPartManager.getPartMeta(w3Id, 1);
assertTrue(w3Part0 != null);
assertTrue(w3Part1 != null);
assertEquals(w3Part0.getPartitionKey().getStartRow(), 0);
assertEquals(w3Part0.getPartitionKey().getEndRow(), 1);
assertEquals(w3Part0.getPartitionKey().getStartCol(), 0);
assertEquals(w3Part0.getPartitionKey().getEndCol(), 50000);
assertEquals(w3Part1.getPartitionKey().getStartRow(), 0);
assertEquals(w3Part1.getPartitionKey().getEndRow(), 1);
assertEquals(w3Part1.getPartitionKey().getStartCol(), 50000);
assertEquals(w3Part1.getPartitionKey().getEndCol(), 100000);
PartitionMeta w4Part0 = matrixPartManager.getPartMeta(w4Id, 0);
PartitionMeta w4Part1 = matrixPartManager.getPartMeta(w4Id, 1);
assertTrue(w4Part0 != null);
assertTrue(w4Part1 != null);
assertEquals(w4Part0.getPartitionKey().getStartRow(), 0);
assertEquals(w4Part0.getPartitionKey().getEndRow(), 1);
assertEquals(w4Part0.getPartitionKey().getStartCol(), 0);
assertEquals(w4Part0.getPartitionKey().getEndCol(), 50000);
assertEquals(w4Part1.getPartitionKey().getStartRow(), 0);
assertEquals(w4Part1.getPartitionKey().getEndRow(), 1);
assertEquals(w4Part1.getPartitionKey().getStartCol(), 50000);
assertEquals(w4Part1.getPartitionKey().getEndCol(), 100000);
MatrixClient w4ClientForTask0 = worker.getPSAgent().getMatrixClient("w4", 0);
MatrixClient w4ClientForTask1 = worker.getPSAgent().getMatrixClient("w4", 1);
TaskContext task0Context = w4ClientForTask0.getTaskContext();
TaskContext task1Context = w4ClientForTask1.getTaskContext();
double[] delta = new double[100000];
for (int i = 0; i < delta.length; i++) {
delta[i] = 1.0;
}
int iterIndex = 0;
while (iterIndex < 5) {
DenseDoubleVector row1 = (DenseDoubleVector) w4ClientForTask0.getRow(0);
double sum1 = sum(row1.getValues());
LOG.info("taskid=" + task0Context.getIndex() + ", matrixId=" + w4ClientForTask0.getMatrixId() + ", rowIndex=0, local row sum=" + sum1);
DenseDoubleVector deltaRow1 = new DenseDoubleVector(delta.length, delta);
deltaRow1.setMatrixId(w4ClientForTask0.getMatrixId());
deltaRow1.setRowId(0);
w4ClientForTask0.increment(deltaRow1);
w4ClientForTask0.clock().get();
task0Context.increaseEpoch();
DenseDoubleVector row2 = (DenseDoubleVector) w4ClientForTask1.getRow(0);
double sum2 = sum(row2.getValues());
LOG.info("taskid=" + task1Context.getIndex() + ", matrixId=" + w4ClientForTask1.getMatrixId() + ", rowIndex=1, local row sum=" + sum2);
DenseDoubleVector deltaRow2 = new DenseDoubleVector(delta.length, delta);
deltaRow2.setMatrixId(w4ClientForTask1.getMatrixId());
deltaRow2.setRowId(0);
w4ClientForTask1.increment(deltaRow2);
w4ClientForTask1.clock().get();
task1Context.increaseEpoch();
iterIndex++;
}
AMTaskManager amTaskManager = angelAppMaster.getAppContext().getTaskManager();
AMTask amTask0 = amTaskManager.getTask(task0Id);
AMTask amTask1 = amTaskManager.getTask(task1Id);
assertEquals(amTask0.getIteration(), 5);
assertEquals(amTask1.getIteration(), 5);
Int2IntOpenHashMap task0MatrixClocks = amTask0.getMatrixClocks();
assertEquals(task0MatrixClocks.size(), 1);
assertEquals(task0MatrixClocks.get(w4Id), 5);
Int2IntOpenHashMap task1MatrixClocks = amTask1.getMatrixClocks();
assertEquals(task1MatrixClocks.size(), 1);
assertEquals(task1MatrixClocks.get(w4Id), 5);
DenseDoubleVector row1 = (DenseDoubleVector) w4ClientForTask0.getRow(0);
double sum1 = sum(row1.getValues());
assertEquals(sum1, 1000000.0, 0.000001);
DenseDoubleVector row2 = (DenseDoubleVector) w4ClientForTask1.getRow(0);
double sum2 = sum(row2.getValues());
assertEquals(sum2, 1000000.0, 0.000001);
masterClient.releaseMatrix(w3Meta.getName());
Thread.sleep(10000);
matrixw3Proto = matrixMetaManager.getMatrix("w3");
assertTrue(matrixw3Proto == null);
MatrixStorageManager matrixStorageManager = LocalClusterContext.get().getPS(psAttempt0Id).getPS().getMatrixStorageManager();
ServerMatrix sw3 = matrixStorageManager.getMatrix(w3Id);
assertTrue(sw3 == null);
w4ClientForTask0.clock().get();
w4ClientForTask1.clock().get();
row1 = (DenseDoubleVector) w4ClientForTask0.getRow(0);
sum1 = sum(row1.getValues());
assertEquals(sum1, 1000000.0, 0.000001);
row2 = (DenseDoubleVector) w4ClientForTask1.getRow(0);
sum2 = sum(row2.getValues());
assertEquals(sum2, 1000000.0, 0.000001);
} catch (Exception x) {
LOG.error("run testCreateMatrix failed ", x);
throw x;
}
}
use of com.tencent.angel.psagent.client.MasterClient in project angel by Tencent.
the class TaskManagerTest method testTaskMatrixClock.
@Test
public void testTaskMatrixClock() throws ServiceException {
try {
LOG.info("===========================testTaskMatrixClock===============================");
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
PSAgentMatrixMetaManager matrixMetaManager = worker.getPSAgent().getMatrixMetaManager();
int w1Id = matrixMetaManager.getMatrixId("w1");
int w2Id = matrixMetaManager.getMatrixId("w2");
MasterClient masterClient = worker.getPSAgent().getMasterClient();
AMTask task0 = taskManager.getTask(task0Id);
AMTask task1 = taskManager.getTask(task1Id);
masterClient.updateClock(task0Id.getIndex(), w1Id, 1);
masterClient.updateClock(task0Id.getIndex(), w2Id, 1);
Int2IntOpenHashMap matrixClocks = task0.getMatrixClocks();
assertEquals(matrixClocks.size(), 2);
assertEquals(matrixClocks.get(w1Id), 1);
assertEquals(matrixClocks.get(w2Id), 1);
masterClient.updateClock(task0Id.getIndex(), w1Id, 2);
assertEquals(task0.getMatrixClock(w1Id), 2);
assertEquals(task0.getMatrixClock(w2Id), 1);
masterClient.updateClock(task1Id.getIndex(), w1Id, 1);
masterClient.updateClock(task1Id.getIndex(), w2Id, 1);
matrixClocks = task1.getMatrixClocks();
assertEquals(matrixClocks.size(), 2);
assertEquals(matrixClocks.get(w1Id), 1);
assertEquals(matrixClocks.get(w2Id), 1);
masterClient.updateClock(task1Id.getIndex(), w1Id, 2);
assertEquals(task1.getMatrixClock(w1Id), 2);
assertEquals(task1.getMatrixClock(w2Id), 1);
} catch (Exception x) {
LOG.error("run testTaskMatrixClock failed ", x);
throw x;
}
}
Aggregations