use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.
the class MasterService method getPSLocation.
/**
* get a specific parameter server location.
* @param controller rpc controller of protobuf
* @param request parameter server id
* @throws ServiceException
*/
@Override
public GetPSLocationReponse getPSLocation(RpcController controller, GetPSLocationRequest request) throws ServiceException {
GetPSLocationReponse.Builder resBuilder = GetPSLocationReponse.newBuilder();
ParameterServerId psId = ProtobufUtil.convertToId(request.getPsId());
Location psLocation = context.getLocationManager().getPsLocation(psId);
if (psLocation == null) {
resBuilder.setPsLocation(PSLocationProto.newBuilder().setPsId(request.getPsId()).setPsStatus(PSStatus.PS_NOTREADY).build());
} else {
resBuilder.setPsLocation(ProtobufUtil.convertToPSLocProto(psId, psLocation));
}
return resBuilder.build();
}
use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.
the class MasterService method getPartLocation.
/**
* Get locations for a partition
* @param controller
* @param request
* @return
* @throws ServiceException
*/
@Override
public GetPartLocationResponse getPartLocation(RpcController controller, GetPartLocationRequest request) throws ServiceException {
GetPartLocationResponse.Builder builder = GetPartLocationResponse.newBuilder();
List<ParameterServerId> psIds = context.getMatrixMetaManager().getPss(request.getMatrixId(), request.getPartId());
if (psIds != null) {
int size = psIds.size();
for (int i = 0; i < size; i++) {
Location psLocation = context.getLocationManager().getPsLocation(psIds.get(i));
if (psLocation == null) {
builder.addLocations((PSLocationProto.newBuilder().setPsId(ProtobufUtil.convertToIdProto(psIds.get(i))).setPsStatus(PSStatus.PS_NOTREADY).build()));
} else {
builder.addLocations(ProtobufUtil.convertToPSLocProto(psIds.get(i), psLocation));
}
}
}
return builder.build();
}
use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.
the class PSFailedReportTest method testPSFailedReport.
@Test
public void testPSFailedReport() throws Exception {
ParameterServerId ps1Id = new ParameterServerId(0);
final ParameterServerId ps2Id = new ParameterServerId(1);
PSAttemptId ps1Attempt0Id = new PSAttemptId(ps1Id, 0);
PSAttemptId ps2Attempt0Id = new PSAttemptId(ps2Id, 0);
PSAttemptId ps2Attempt1Id = new PSAttemptId(ps2Id, 1);
ParameterServer ps1Attempt0 = LocalClusterContext.get().getPS(ps1Attempt0Id).getPS();
ParameterServer ps2Attempt0 = LocalClusterContext.get().getPS(ps2Attempt0Id).getPS();
WorkerId worker0Id = new WorkerId(new WorkerGroupId(0), 0);
WorkerAttemptId worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
Worker worker0 = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
TaskContext task0Context = worker0.getTaskManager().getRunningTask().get(task0Id).getTaskContext();
MatrixClient matrixClient = task0Context.getMatrix("w1");
int iterNum = 20;
for (int i = 0; i < iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
MatrixStorageManager ps2Storage = ps2Attempt0.getMatrixStorageManager();
ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps2w1.getPartition(0));
assertNotNull(ps2w1.getPartition(1));
row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
part0Size = ps2w1.getRow(0, 0).size();
row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
part1Size = ps2w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
LOG.info("===================================================================ps2 failed");
HashMap<PSLocation, Integer> failedCounters = new HashMap<>();
PSLocation psLoc = new PSLocation(ps2Id, ps2Attempt0.getLocationManager().getPsLocation(ps2Id));
failedCounters.put(psLoc, 10000);
worker0.getPSAgent().getMasterClient().psFailedReport(failedCounters);
Thread.sleep(20000);
for (int i = iterNum; i < 2 * iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
ParameterServer ps2Attempt = LocalClusterContext.get().getPS(ps2Attempt1Id).getPS();
for (int i = iterNum * 2; i < 3 * iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
MatrixStorageManager ps2Storage = ps2Attempt.getMatrixStorageManager();
ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps2w1.getPartition(0));
assertNotNull(ps2w1.getPartition(1));
row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
part0Size = ps2w1.getRow(0, 0).size();
row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
part1Size = ps2w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
}
use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.
the class ProtobufUtil method convert.
public static HashMap<PSLocation, Integer> convert(PSFailedReportsProto reportsProto) {
HashMap<PSLocation, Integer> reports = new HashMap<>();
List<PSFailedReportProto> reportList = reportsProto.getPsFailedReportsList();
int size = reportList.size();
for (int i = 0; i < size; i++) {
reports.put(new PSLocation(convertToId(reportList.get(i).getPsLoc().getPsId()), convert(reportList.get(i).getPsLoc().getLocation())), reportList.get(i).getFailedCounter());
}
return reports;
}
use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.
the class AMMatrixMetaManager method handlePartReport.
private void handlePartReport(ParameterServerId psId, int matrixId, PartReport partReport) {
ParameterServerId master = matrixMetaManager.getMasterPs(matrixId, partReport.partId);
if (!psId.equals(master)) {
MatrixMeta matrixMeta = matrixMetaManager.getMatrixMeta(matrixId);
if (matrixMeta == null) {
return;
}
matrixMeta.getPartitionMeta(partReport.partId).addReplicationPS(psId);
if (partReport.state == PartitionState.INITIALIZING) {
addNeedRecoverPart(master, new RecoverPartKey(new PartitionKey(matrixId, partReport.partId), new PSLocation(psId, context.getLocationManager().getPsLocation(psId))));
} else if (partReport.state == PartitionState.READ_AND_WRITE) {
ParameterServerId orignalMaster = matrixPartitionsOnPS.get(psId).get(matrixId).getPartitionMeta(partReport.partId).getMasterPs();
if (orignalMaster.equals(psId)) {
matrixMetaManager.getMatrixMeta(matrixId).getPartitionMeta(partReport.partId).makePsToMaster(psId);
}
}
}
}
Aggregations