Search in sources :

Example 1 with MatrixStorageManager

use of com.tencent.angel.ps.impl.MatrixStorageManager in project angel by Tencent.

the class PSFailedReportTest method testPSFailedReport.

@Test
public void testPSFailedReport() throws Exception {
    ParameterServerId ps1Id = new ParameterServerId(0);
    final ParameterServerId ps2Id = new ParameterServerId(1);
    PSAttemptId ps1Attempt0Id = new PSAttemptId(ps1Id, 0);
    PSAttemptId ps2Attempt0Id = new PSAttemptId(ps2Id, 0);
    PSAttemptId ps2Attempt1Id = new PSAttemptId(ps2Id, 1);
    ParameterServer ps1Attempt0 = LocalClusterContext.get().getPS(ps1Attempt0Id).getPS();
    ParameterServer ps2Attempt0 = LocalClusterContext.get().getPS(ps2Attempt0Id).getPS();
    WorkerId worker0Id = new WorkerId(new WorkerGroupId(0), 0);
    WorkerAttemptId worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
    Worker worker0 = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
    TaskContext task0Context = worker0.getTaskManager().getRunningTask().get(task0Id).getTaskContext();
    MatrixClient matrixClient = task0Context.getMatrix("w1");
    int iterNum = 20;
    for (int i = 0; i < iterNum; i++) {
        DenseIntVector update = new DenseIntVector(dim);
        for (int j = 0; j < dim; j++) {
            update.set(j, 1);
        }
        update.setMatrixId(matrixClient.getMatrixId());
        update.setRowId(0);
        matrixClient.increment(update);
        matrixClient.clock().get();
        Thread.sleep(1000);
        MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
        ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps1w1.getPartition(0));
        assertNotNull(ps1w1.getPartition(1));
        IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
        int part0Size = ps1w1.getRow(0, 0).size();
        IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
        int part1Size = ps1w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
        MatrixStorageManager ps2Storage = ps2Attempt0.getMatrixStorageManager();
        ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps2w1.getPartition(0));
        assertNotNull(ps2w1.getPartition(1));
        row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
        part0Size = ps2w1.getRow(0, 0).size();
        row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
        part1Size = ps2w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
    }
    LOG.info("===================================================================ps2 failed");
    HashMap<PSLocation, Integer> failedCounters = new HashMap<>();
    PSLocation psLoc = new PSLocation(ps2Id, ps2Attempt0.getLocationManager().getPsLocation(ps2Id));
    failedCounters.put(psLoc, 10000);
    worker0.getPSAgent().getMasterClient().psFailedReport(failedCounters);
    Thread.sleep(20000);
    for (int i = iterNum; i < 2 * iterNum; i++) {
        DenseIntVector update = new DenseIntVector(dim);
        for (int j = 0; j < dim; j++) {
            update.set(j, 1);
        }
        update.setMatrixId(matrixClient.getMatrixId());
        update.setRowId(0);
        matrixClient.increment(update);
        matrixClient.clock().get();
        Thread.sleep(1000);
        MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
        ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps1w1.getPartition(0));
        assertNotNull(ps1w1.getPartition(1));
        IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
        int part0Size = ps1w1.getRow(0, 0).size();
        IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
        int part1Size = ps1w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
    }
    ParameterServer ps2Attempt = LocalClusterContext.get().getPS(ps2Attempt1Id).getPS();
    for (int i = iterNum * 2; i < 3 * iterNum; i++) {
        DenseIntVector update = new DenseIntVector(dim);
        for (int j = 0; j < dim; j++) {
            update.set(j, 1);
        }
        update.setMatrixId(matrixClient.getMatrixId());
        update.setRowId(0);
        matrixClient.increment(update);
        matrixClient.clock().get();
        Thread.sleep(1000);
        MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
        ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps1w1.getPartition(0));
        assertNotNull(ps1w1.getPartition(1));
        IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
        int part0Size = ps1w1.getRow(0, 0).size();
        IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
        int part1Size = ps1w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
        MatrixStorageManager ps2Storage = ps2Attempt.getMatrixStorageManager();
        ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps2w1.getPartition(0));
        assertNotNull(ps2w1.getPartition(1));
        row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
        part0Size = ps2w1.getRow(0, 0).size();
        row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
        part1Size = ps2w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
    }
}
Also used : TaskContext(com.tencent.angel.worker.task.TaskContext) HashMap(java.util.HashMap) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ServerMatrix(com.tencent.angel.ps.impl.matrix.ServerMatrix) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServer(com.tencent.angel.ps.impl.ParameterServer) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) DenseIntVector(com.tencent.angel.ml.math.vector.DenseIntVector) PSAttemptId(com.tencent.angel.ps.PSAttemptId) PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) MatrixStorageManager(com.tencent.angel.ps.impl.MatrixStorageManager) IntBuffer(java.nio.IntBuffer) ServerDenseIntRow(com.tencent.angel.ps.impl.matrix.ServerDenseIntRow) Worker(com.tencent.angel.worker.Worker) MatrixClient(com.tencent.angel.psagent.matrix.MatrixClient) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Test(org.junit.Test)

Example 2 with MatrixStorageManager

use of com.tencent.angel.ps.impl.MatrixStorageManager in project angel by Tencent.

the class PeriodHATest method testHA.

@Test
public void testHA() throws Exception {
    ParameterServerId ps1Id = new ParameterServerId(0);
    final ParameterServerId ps2Id = new ParameterServerId(1);
    PSAttemptId ps1Attempt0Id = new PSAttemptId(ps1Id, 0);
    PSAttemptId ps2Attempt0Id = new PSAttemptId(ps2Id, 0);
    PSAttemptId ps2Attempt1Id = new PSAttemptId(ps2Id, 1);
    ParameterServer ps1Attempt0 = LocalClusterContext.get().getPS(ps1Attempt0Id).getPS();
    ParameterServer ps2Attempt0 = LocalClusterContext.get().getPS(ps2Attempt0Id).getPS();
    WorkerId worker0Id = new WorkerId(new WorkerGroupId(0), 0);
    WorkerAttemptId worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
    Worker worker0 = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
    TaskContext task0Context = worker0.getTaskManager().getRunningTask().get(task0Id).getTaskContext();
    MatrixClient matrixClient = task0Context.getMatrix("w1");
    int iterNum = 20;
    for (int i = 0; i < iterNum; i++) {
        DenseIntVector update = new DenseIntVector(dim);
        for (int j = 0; j < dim; j++) {
            update.set(j, 1);
        }
        update.setMatrixId(matrixClient.getMatrixId());
        update.setRowId(0);
        matrixClient.increment(update);
        matrixClient.clock().get();
        Thread.sleep(1000);
        MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
        ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps1w1.getPartition(0));
        assertNotNull(ps1w1.getPartition(1));
        IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
        int part0Size = ps1w1.getRow(0, 0).size();
        IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
        int part1Size = ps1w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
        MatrixStorageManager ps2Storage = ps2Attempt0.getMatrixStorageManager();
        ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps2w1.getPartition(0));
        assertNotNull(ps2w1.getPartition(1));
        row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
        part0Size = ps2w1.getRow(0, 0).size();
        row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
        part1Size = ps2w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
    }
    LOG.info("===================================================================ps2 failed");
    ps2Attempt0.failed("exit");
    for (int i = iterNum; i < 2 * iterNum; i++) {
        DenseIntVector update = new DenseIntVector(dim);
        for (int j = 0; j < dim; j++) {
            update.set(j, 1);
        }
        update.setMatrixId(matrixClient.getMatrixId());
        update.setRowId(0);
        matrixClient.increment(update);
        matrixClient.clock().get();
        Thread.sleep(1000);
        MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
        ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps1w1.getPartition(0));
        assertNotNull(ps1w1.getPartition(1));
        IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
        int part0Size = ps1w1.getRow(0, 0).size();
        IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
        int part1Size = ps1w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
    }
    ParameterServer ps2Attempt = LocalClusterContext.get().getPS(ps2Attempt1Id).getPS();
    for (int i = iterNum * 2; i < 3 * iterNum; i++) {
        DenseIntVector update = new DenseIntVector(dim);
        for (int j = 0; j < dim; j++) {
            update.set(j, 1);
        }
        update.setMatrixId(matrixClient.getMatrixId());
        update.setRowId(0);
        matrixClient.increment(update);
        matrixClient.clock().get();
        Thread.sleep(1000);
        MatrixStorageManager ps1Storage = ps1Attempt0.getMatrixStorageManager();
        ServerMatrix ps1w1 = ps1Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps1w1.getPartition(0));
        assertNotNull(ps1w1.getPartition(1));
        IntBuffer row0Part0 = ((ServerDenseIntRow) ps1w1.getRow(0, 0)).getData();
        int part0Size = ps1w1.getRow(0, 0).size();
        IntBuffer row0Part1 = ((ServerDenseIntRow) ps1w1.getRow(1, 0)).getData();
        int part1Size = ps1w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
        MatrixStorageManager ps2Storage = ps2Attempt.getMatrixStorageManager();
        ServerMatrix ps2w1 = ps2Storage.getMatrix(matrixClient.getMatrixId());
        assertNotNull(ps2w1.getPartition(0));
        assertNotNull(ps2w1.getPartition(1));
        row0Part0 = ((ServerDenseIntRow) ps2w1.getRow(0, 0)).getData();
        part0Size = ps2w1.getRow(0, 0).size();
        row0Part1 = ((ServerDenseIntRow) ps2w1.getRow(1, 0)).getData();
        part1Size = ps2w1.getRow(1, 0).size();
        assertEquals(sum(row0Part0, part0Size), (i + 1) * dim / 2);
        assertEquals(sum(row0Part1, part1Size), (i + 1) * dim / 2);
    }
}
Also used : TaskContext(com.tencent.angel.worker.task.TaskContext) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ServerMatrix(com.tencent.angel.ps.impl.matrix.ServerMatrix) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServer(com.tencent.angel.ps.impl.ParameterServer) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) DenseIntVector(com.tencent.angel.ml.math.vector.DenseIntVector) PSAttemptId(com.tencent.angel.ps.PSAttemptId) MatrixStorageManager(com.tencent.angel.ps.impl.MatrixStorageManager) IntBuffer(java.nio.IntBuffer) ServerDenseIntRow(com.tencent.angel.ps.impl.matrix.ServerDenseIntRow) Worker(com.tencent.angel.worker.Worker) MatrixClient(com.tencent.angel.psagent.matrix.MatrixClient) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Test(org.junit.Test)

Example 3 with MatrixStorageManager

use of com.tencent.angel.ps.impl.MatrixStorageManager in project angel by Tencent.

the class WorkerPool method getPart.

@SuppressWarnings("unused")
private void getPart(ChannelHandlerContext ctx, int seqId, int methodId, ByteBuf in) {
    int partId = in.readInt();
    int matId = in.readInt();
    int clock = in.readInt();
    int str = in.readInt();
    PartitionKey partKey = new PartitionKey();
    partKey.setMatrixId(matId);
    partKey.setPartitionId(partId);
    int len = in.readInt();
    ByteBuf buf = ByteBufUtils.newByteBuf(8 + len * 4, useDirectorBuffer);
    buf.writeInt(seqId);
    buf.writeInt(methodId);
    Response resposne = null;
    if (!isClockReady(partKey, clock)) {
        resposne = new Response(ResponseType.CLOCK_NOTREADY);
    // resposne.encode(buf);
    // TODO:
    } else {
        resposne = new Response(ResponseType.SUCCESS);
        // resposne.encode(buf);
        // TODO:
        MatrixStorageManager matPartManager = context.getMatrixStorageManager();
        int rslen = in.readInt();
        for (int i = 0; i < rslen - 1; i++) {
            int rowId = str + i;
            len = in.readInt();
            matPartManager.getRow(matId, rowId, partId).encode(in, buf, len);
        }
    }
    ctx.writeAndFlush(buf);
}
Also used : MatrixStorageManager(com.tencent.angel.ps.impl.MatrixStorageManager) PartitionKey(com.tencent.angel.PartitionKey) ByteBuf(io.netty.buffer.ByteBuf)

Aggregations

MatrixStorageManager (com.tencent.angel.ps.impl.MatrixStorageManager)3 DenseIntVector (com.tencent.angel.ml.math.vector.DenseIntVector)2 PSAttemptId (com.tencent.angel.ps.PSAttemptId)2 ParameterServerId (com.tencent.angel.ps.ParameterServerId)2 ParameterServer (com.tencent.angel.ps.impl.ParameterServer)2 ServerDenseIntRow (com.tencent.angel.ps.impl.matrix.ServerDenseIntRow)2 ServerMatrix (com.tencent.angel.ps.impl.matrix.ServerMatrix)2 MatrixClient (com.tencent.angel.psagent.matrix.MatrixClient)2 Worker (com.tencent.angel.worker.Worker)2 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)2 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)2 WorkerId (com.tencent.angel.worker.WorkerId)2 TaskContext (com.tencent.angel.worker.task.TaskContext)2 IntBuffer (java.nio.IntBuffer)2 Test (org.junit.Test)2 PartitionKey (com.tencent.angel.PartitionKey)1 PSLocation (com.tencent.angel.ml.matrix.transport.PSLocation)1 ByteBuf (io.netty.buffer.ByteBuf)1 HashMap (java.util.HashMap)1