Search in sources :

Example 76 with PartitionKey

use of com.tencent.angel.PartitionKey in project angel by Tencent.

the class WorkerPool method getPart.

@SuppressWarnings("unused")
private void getPart(ChannelHandlerContext ctx, int seqId, int methodId, ByteBuf in) {
    int partId = in.readInt();
    int matId = in.readInt();
    int clock = in.readInt();
    int str = in.readInt();
    PartitionKey partKey = new PartitionKey();
    partKey.setMatrixId(matId);
    partKey.setPartitionId(partId);
    int len = in.readInt();
    ByteBuf buf = ByteBufUtils.newByteBuf(8 + len * 4, useDirectorBuffer);
    buf.writeInt(seqId);
    buf.writeInt(methodId);
    Response resposne = null;
    if (!isClockReady(partKey, clock)) {
        resposne = new Response(ResponseType.CLOCK_NOTREADY);
    // resposne.encode(buf);
    // TODO:
    } else {
        resposne = new Response(ResponseType.SUCCESS);
        // resposne.encode(buf);
        // TODO:
        MatrixStorageManager matPartManager = context.getMatrixStorageManager();
        int rslen = in.readInt();
        for (int i = 0; i < rslen - 1; i++) {
            int rowId = str + i;
            len = in.readInt();
            matPartManager.getRow(matId, rowId, partId).encode(in, buf, len);
        }
    }
    ctx.writeAndFlush(buf);
}
Also used : MatrixStorageManager(com.tencent.angel.ps.impl.MatrixStorageManager) PartitionKey(com.tencent.angel.PartitionKey) ByteBuf(io.netty.buffer.ByteBuf)

Example 77 with PartitionKey

use of com.tencent.angel.PartitionKey in project angel by Tencent.

the class WorkerPool method putPartUpdate.

/**
 * Update a matrix partition
 * @param seqId rpc request id
 * @param request rpc request
 * @param in serialized request
 * @return serialized rpc response
 */
private ByteBuf putPartUpdate(int seqId, PutPartitionUpdateRequest request, ByteBuf in) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("put update request=" + request + " with seqId=" + seqId);
    }
    long startTs = System.currentTimeMillis();
    ByteBuf buf = ByteBufUtils.newByteBuf(8 + 4);
    buf.writeInt(seqId);
    PutPartitionUpdateResponse response = null;
    // Get partition and check the partition state
    PartitionKey partKey = request.getPartKey();
    ServerPartition part = context.getMatrixStorageManager().getPart(partKey.getMatrixId(), partKey.getPartitionId());
    if (part == null) {
        String log = "update " + request + " failed. The partition " + partKey + " does not exist";
        LOG.fatal(log);
        response = new PutPartitionUpdateResponse(ResponseType.SERVER_HANDLE_FATAL, log);
        response.serialize(buf);
        return buf;
    }
    PartitionState state = part.getState();
    if (!request.isComeFromPs() && state != PartitionState.READ_AND_WRITE) {
        String log = "update " + request + " failed. The partition " + partKey + " state is " + state;
        LOG.error(log);
        response = new PutPartitionUpdateResponse(ResponseType.SERVER_HANDLE_FAILED, log);
        response.serialize(buf);
        return buf;
    }
    // Get the stored pss for this partition
    PartitionLocation partLoc = null;
    try {
        partLoc = context.getMatrixMetaManager().getPartLocation(request.getPartKey(), disableRouterCache);
    } catch (Throwable x) {
        String log = "update " + request + " failed, get partition location from master failed " + x.getMessage();
        LOG.error(log, x);
        response = new PutPartitionUpdateResponse(ResponseType.SERVER_HANDLE_FAILED, log);
        response.serialize(buf);
        return buf;
    }
    // Check this ps is the master ps for this partition, if not, just return failed
    if (!request.isComeFromPs() && !isPartMasterPs(partLoc)) {
        String log = "local ps is " + context.getPSAttemptId().getPsId() + " update " + request + " failed, update to slave ps for partition " + request.getPartKey();
        LOG.error(log);
        response = new PutPartitionUpdateResponse(ResponseType.SERVER_HANDLE_FAILED, log);
    } else {
        int clock = request.getClock();
        partKey = request.getPartKey();
        int taskIndex = request.getTaskIndex();
        boolean updateClock = request.isUpdateClock();
        if (LOG.isDebugEnabled()) {
            LOG.debug("seqId = " + seqId + " update split request matrixId = " + partKey.getMatrixId() + ", partId = " + partKey.getPartitionId() + " clock = " + clock + ", taskIndex=" + taskIndex + ", updateClock = " + updateClock);
        }
        try {
            state = part.getState();
            if (state != PartitionState.READ_AND_WRITE) {
                String log = "update " + request + " failed. The partition " + partKey + " state is " + state;
                LOG.error(log);
                response = new PutPartitionUpdateResponse(ResponseType.SERVER_HANDLE_FAILED, log);
                response.serialize(buf);
                return buf;
            }
            part.update(in, rowUpdater);
            if (updateClock) {
                context.getClockVectorManager().updateClock(partKey.getMatrixId(), partKey.getPartitionId(), taskIndex, clock);
            }
            response = new PutPartitionUpdateResponse(ResponseType.SUCCESS);
            // Start to put the update to the slave pss
            if (partLoc.psLocs.size() > 1) {
                context.getPS2PSPusher().put(request, in, partLoc);
                if (updateClock) {
                    context.getPS2PSPusher().updateClock(request.getPartKey(), taskIndex, clock, partLoc);
                }
            }
        } catch (Throwable x) {
            String log = "update " + request + " failed " + x.getMessage();
            LOG.fatal(log, x);
            response = new PutPartitionUpdateResponse(ResponseType.SERVER_HANDLE_FATAL, log);
        }
    }
    response.serialize(buf);
    if (LOG.isDebugEnabled()) {
        LOG.debug("update partition for request " + request + " use time=" + (System.currentTimeMillis() - startTs) + ", response buf=" + buf);
    }
    return buf;
}
Also used : PartitionKey(com.tencent.angel.PartitionKey) ByteBuf(io.netty.buffer.ByteBuf) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 78 with PartitionKey

use of com.tencent.angel.PartitionKey in project angel by Tencent.

the class WorkerPool method update.

/**
 * Update a partition use PSF
 * @param seqId rpc request id
 * @param request rpc request
 * @param in serialized rpc request
 * @return serialized rpc response
 */
private ByteBuf update(int seqId, UpdaterRequest request, ByteBuf in) {
    UpdaterResponse response = null;
    ByteBuf buf = ByteBufUtils.newByteBuf(4 + 8, useDirectorBuffer);
    // Get partition and check the partition state
    PartitionKey partKey = request.getPartKey();
    ServerPartition part = context.getMatrixStorageManager().getPart(partKey.getMatrixId(), partKey.getPartitionId());
    if (part == null) {
        String log = "update " + request + " failed. The partition " + partKey + " does not exist";
        LOG.fatal(log);
        response = new UpdaterResponse(ResponseType.SERVER_HANDLE_FATAL, log);
        response.serialize(buf);
        return buf;
    }
    PartitionState state = part.getState();
    if (state != PartitionState.READ_AND_WRITE) {
        String log = "update " + request + " failed. The partition " + partKey + " state is " + state;
        LOG.error(log);
        response = new UpdaterResponse(ResponseType.PARTITION_READ_ONLY, log);
        response.serialize(buf);
        return buf;
    }
    // Get the stored pss for this partition
    PartitionLocation partLoc = null;
    try {
        partLoc = context.getMatrixMetaManager().getPartLocation(request.getPartKey(), disableRouterCache);
    } catch (Throwable x) {
        String log = "update " + request + " failed, get partition location from master failed " + x.getMessage();
        LOG.error(log, x);
        response = new UpdaterResponse(ResponseType.SERVER_HANDLE_FAILED, log);
        response.serialize(buf);
        return buf;
    }
    // Check this ps is the master ps for this location, only master ps can accept the update
    if (!request.isComeFromPs() && !isPartMasterPs(partLoc)) {
        String log = "update " + request + " failed, update to slave ps for partition " + request.getPartKey();
        LOG.error(log);
        response = new UpdaterResponse(ResponseType.SERVER_HANDLE_FAILED, log);
    } else {
        try {
            Class<? extends UpdateFunc> funcClass = (Class<? extends UpdateFunc>) Class.forName(request.getUpdaterFuncClass());
            Constructor<? extends UpdateFunc> constructor = funcClass.getConstructor();
            constructor.setAccessible(true);
            UpdateFunc func = constructor.newInstance();
            func.setPsContext(context);
            // Check the partition state again
            state = part.getState();
            if (state != PartitionState.READ_AND_WRITE) {
                String log = "update " + request + " failed. The partition " + partKey + " state is " + state;
                LOG.error(log);
                response = new UpdaterResponse(ResponseType.SERVER_HANDLE_FAILED, log);
                response.serialize(buf);
                return buf;
            }
            part.update(func, request.getPartParam());
            response = new UpdaterResponse();
            response.setResponseType(ResponseType.SUCCESS);
            if (partLoc.psLocs.size() > 1) {
                // Start to put the update to the slave pss
                context.getPS2PSPusher().put(request, in, partLoc);
            }
        } catch (Throwable e) {
            String log = "update " + request + " failed " + e.getMessage();
            LOG.fatal(log, e);
            response = new UpdaterResponse(ResponseType.SERVER_HANDLE_FATAL, log);
        }
    }
    buf.writeInt(seqId);
    response.serialize(buf);
    return buf;
}
Also used : UpdateFunc(com.tencent.angel.ml.matrix.psf.update.enhance.UpdateFunc) PartitionKey(com.tencent.angel.PartitionKey) ByteBuf(io.netty.buffer.ByteBuf) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 79 with PartitionKey

use of com.tencent.angel.PartitionKey in project angel by Tencent.

the class CompSparseIntMatrixOpLog method split.

/**
 * Split the update according to the matrix partitions
 *
 * @param psUpdateData partition -> row split list map
 */
public void split(Map<PartitionKey, List<RowUpdateSplit>> psUpdateData) {
    long startTime = System.currentTimeMillis();
    MatrixMeta matrixMeta = PSAgentContext.get().getMatrixMetaManager().getMatrixMeta(matrixId);
    List<PartitionKey> partitions = PSAgentContext.get().getMatrixMetaManager().getPartitions(matrixId);
    int row = matrixMeta.getRowNum();
    for (int rowId = 0; rowId < row; rowId++) {
        TVector vector = getRow(rowId);
        if (vector == null)
            continue;
        // Filter it, removing zero values
        if (enableFilter && isNeedFilter(vector)) {
            vector = vector.filter(0.0);
        }
        // Doing average or not
        if (matrixMeta.isAverage()) {
            vector.timesBy(1.0 / PSAgentContext.get().getTotalTaskNum());
        }
        // Split this row according the matrix partitions
        Map<PartitionKey, RowUpdateSplit> splits = RowUpdateSplitUtils.split(vector, partitions);
        removeRow(rowId);
        // Add the splits to the result container
        for (Map.Entry<PartitionKey, RowUpdateSplit> entry : splits.entrySet()) {
            List<RowUpdateSplit> rowSplits = psUpdateData.get(entry.getKey());
            if (rowSplits == null) {
                rowSplits = new ArrayList<>();
                psUpdateData.put(entry.getKey(), rowSplits);
            }
            rowSplits.add(entry.getValue());
        }
    }
    LOG.debug("taking " + (System.currentTimeMillis() - startTime) + " ms to split logs for matrix=" + matrixId);
}
Also used : MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PartitionKey(com.tencent.angel.PartitionKey) TVector(com.tencent.angel.ml.math.TVector) Map(java.util.Map)

Example 80 with PartitionKey

use of com.tencent.angel.PartitionKey in project angel by Tencent.

the class AMMatrixMetaManager method handlePartReport.

private void handlePartReport(ParameterServerId psId, int matrixId, PartReport partReport) {
    ParameterServerId master = matrixMetaManager.getMasterPs(matrixId, partReport.partId);
    if (!psId.equals(master)) {
        MatrixMeta matrixMeta = matrixMetaManager.getMatrixMeta(matrixId);
        if (matrixMeta == null) {
            return;
        }
        matrixMeta.getPartitionMeta(partReport.partId).addReplicationPS(psId);
        if (partReport.state == PartitionState.INITIALIZING) {
            addNeedRecoverPart(master, new RecoverPartKey(new PartitionKey(matrixId, partReport.partId), new PSLocation(psId, context.getLocationManager().getPsLocation(psId))));
        } else if (partReport.state == PartitionState.READ_AND_WRITE) {
            ParameterServerId orignalMaster = matrixPartitionsOnPS.get(psId).get(matrixId).getPartitionMeta(partReport.partId).getMasterPs();
            if (orignalMaster.equals(psId)) {
                matrixMetaManager.getMatrixMeta(matrixId).getPartitionMeta(partReport.partId).makePsToMaster(psId);
            }
        }
    }
}
Also used : RecoverPartKey(com.tencent.angel.ps.recovery.ha.RecoverPartKey) PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) PartitionKey(com.tencent.angel.PartitionKey) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Aggregations

PartitionKey (com.tencent.angel.PartitionKey)80 ArrayList (java.util.ArrayList)17 ByteBuf (io.netty.buffer.ByteBuf)12 Test (org.junit.Test)9 PartitionGetResult (com.tencent.angel.ml.matrix.psf.get.base.PartitionGetResult)8 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)7 PartitionGetParam (com.tencent.angel.ml.matrix.psf.get.base.PartitionGetParam)7 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)4 ServerRow (com.tencent.angel.ps.impl.matrix.ServerRow)4 ParameterServerId (com.tencent.angel.ps.ParameterServerId)3 RecoverPartKey (com.tencent.angel.ps.recovery.ha.RecoverPartKey)3 FutureResult (com.tencent.angel.psagent.matrix.transport.FutureResult)3 Map (java.util.Map)3 Location (com.tencent.angel.common.location.Location)2 TVector (com.tencent.angel.ml.math.TVector)2 RowType (com.tencent.angel.ml.matrix.RowType)2 PSLocation (com.tencent.angel.ml.matrix.transport.PSLocation)2 MatrixStorageManager (com.tencent.angel.ps.impl.MatrixStorageManager)2 ClockCache (com.tencent.angel.psagent.clock.ClockCache)2 Worker (com.tencent.angel.worker.Worker)2