use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class MatrixTransportClient method getRowsSplit.
@Override
public Future<List<ServerRow>> getRowsSplit(PartitionKey partKey, List<Integer> rowIndexes, int clock) {
ParameterServerId serverId = PSAgentContext.get().getMatrixMetaManager().getMasterPS(partKey);
GetRowsSplitRequest request = new GetRowsSplitRequest(clock, partKey, rowIndexes);
FutureResult<List<ServerRow>> future = new FutureResult<>();
requestToResultMap.put(request, future);
addToGetQueueForServer(serverId, request);
startGet();
return future;
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class MatrixTransportClient method getPart.
@Override
public Future<ServerPartition> getPart(PartitionKey partKey, int clock) {
ParameterServerId serverId = PSAgentContext.get().getMatrixMetaManager().getMasterPS(partKey);
GetPartitionRequest request = new GetPartitionRequest(partKey, clock);
FutureResult<ServerPartition> future = new FutureResult<>();
requestToResultMap.put(request, future);
addToGetQueueForServer(serverId, request);
startGet();
return future;
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class MatrixTransportClient method putPart.
@Override
public Future<VoidResult> putPart(PartitionKey partKey, List<RowUpdateSplit> rowsSplit, int taskIndex, int clock, boolean updateClock) {
ParameterServerId serverId = PSAgentContext.get().getMatrixMetaManager().getMasterPS(partKey);
PutPartitionUpdateRequest request = new PutPartitionUpdateRequest(taskIndex, clock, partKey, rowsSplit, updateClock);
FutureResult<VoidResult> future = new FutureResult<>();
requestToResultMap.put(request, future);
addToPutQueueForServer(serverId, request);
startPut();
return future;
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class PeriodHATest method testHA.
@Test
public void testHA() throws Exception {
ParameterServerId ps1Id = new ParameterServerId(0);
final ParameterServerId ps2Id = new ParameterServerId(1);
PSAttemptId ps1Attempt0Id = new PSAttemptId(ps1Id, 0);
PSAttemptId ps2Attempt0Id = new PSAttemptId(ps2Id, 0);
PSAttemptId ps2Attempt1Id = new PSAttemptId(ps2Id, 1);
ParameterServer ps1Attempt0 = LocalClusterContext.get().getPS(ps1Attempt0Id).getPS();
ParameterServer ps2Attempt0 = LocalClusterContext.get().getPS(ps2Attempt0Id).getPS();
WorkerId worker0Id = new WorkerId(new WorkerGroupId(0), 0);
WorkerAttemptId worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
Worker worker0 = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
TaskContext task0Context = worker0.getTaskManager().getRunningTask().get(task0Id).getTaskContext();
MatrixClient matrixClient = task0Context.getMatrix("w1");
int iterNum = 20;
for (int i = 0; i < iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
MatrixStorageManager ps2Storage = ps2Attempt0.getMatrixStorageManager();
ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps2w1.getPartition(0));
assertNotNull(ps2w1.getPartition(1));
row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
part0Size = ps2w1.getRow(0, 0).size();
row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
part1Size = ps2w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
LOG.info("===================================================================ps2 failed");
ps2Attempt0.failed("exit");
for (int i = iterNum; i < 2 * iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
ParameterServer ps2Attempt = LocalClusterContext.get().getPS(ps2Attempt1Id).getPS();
for (int i = iterNum * 2; i < 3 * iterNum; i++) {
DenseIntVector update = new DenseIntVector(dim);
for (int j = 0; j < dim; j++) {
update.set(j, 1);
}
update.setMatrixId(matrixClient.getMatrixId());
update.setRowId(0);
matrixClient.increment(update);
matrixClient.clock().get();
Thread.sleep(1000);
MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps1w1.getPartition(0));
assertNotNull(ps1w1.getPartition(1));
IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
int part0Size = ps1w1.getRow(0, 0).size();
IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
int part1Size = ps1w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
MatrixStorageManager ps2Storage = ps2Attempt.getMatrixStorageManager();
ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
assertNotNull(ps2w1.getPartition(0));
assertNotNull(ps2w1.getPartition(1));
row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
part0Size = ps2w1.getRow(0, 0).size();
row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
part1Size = ps2w1.getRow(1, 0).size();
assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
}
}
use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.
the class PeriodPusher method start.
/**
* Start
*/
public void start() {
super.start();
dispatcher = new Thread(() -> {
ParameterServerId psId = context.getPSAttemptId().getPsId();
while (!stopped.get() && !Thread.interrupted()) {
try {
Thread.sleep(pushIntervalMs);
Map<PartitionKey, Integer> parts = getAndClearAllNeedRecoverParts();
Map<RecoverPartKey, FutureResult> futures = new HashMap<>(parts.size());
for (PartitionKey part : parts.keySet()) {
PartitionLocation partLoc = context.getMaster().getPartLocation(part.getMatrixId(), part.getPartitionId());
if ((partLoc.psLocs.size() > 1) && psId.equals(partLoc.psLocs.get(0).psId)) {
int size = partLoc.psLocs.size();
for (int i = 1; i < size; i++) {
RecoverPartKey partKey = new RecoverPartKey(part, partLoc.psLocs.get(i));
LOG.info("Start to backup partition " + partKey.partKey + " to " + partKey.psLoc);
futures.put(partKey, recover(partKey));
}
}
}
waitResults(futures);
} catch (Exception e) {
if (!stopped.get()) {
LOG.error("recover parts failed ", e);
}
}
}
});
dispatcher.setName("psha-push-dispatcher");
dispatcher.start();
}
Aggregations