Search in sources :

Example 1 with FutureResult

use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.

the class PS2PSPusherImpl method recover.

@Override
public FutureResult<Response> recover(final RecoverPartKey partKey) {
    FutureResult<Response> result = new FutureResult<>();
    workerPool.execute(() -> {
        ServerPartition part = context.getMatrixStorageManager().getPart(partKey.partKey.getMatrixId(), partKey.partKey.getPartitionId());
        if (part == null) {
            result.set(new Response(ResponseType.UNKNOWN_ERROR, "Can not find partition " + partKey.partKey.getMatrixId() + ":" + partKey.partKey.getPartitionId()));
            return;
        }
        try {
            part.waitAndSetReadOnly();
            result.set(psClient.recoverPart(partKey.psLoc.psId, partKey.psLoc.loc, part).get());
        } catch (Throwable e) {
            result.set(new Response(ResponseType.NETWORK_ERROR, e.getMessage()));
            LOG.error("handle recover event " + partKey + " failed ", e);
        } finally {
            part.setState(PartitionState.READ_AND_WRITE);
            LOG.info("ps " + context.getPSAttemptId() + " set partition " + part.getPartitionKey() + " to " + part.getState());
        }
    });
    return result;
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ServerPartition(com.tencent.angel.ps.impl.matrix.ServerPartition)

Example 2 with FutureResult

use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.

the class SyncEventPusher method put.

@Override
public void put(PartitionRequest request, ByteBuf msg, PartitionLocation partLoc) {
    request.setComeFromPs(true);
    msg.resetReaderIndex();
    msg.setBoolean(8, true);
    PartitionKey partKey = request.getPartKey();
    if (partLoc.psLocs.size() == 1) {
        return;
    } else {
        if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
            int size = partLoc.psLocs.size();
            List<FutureResult> results = new ArrayList<>(size - 1);
            for (int i = 1; i < size; i++) {
                results.add(psClient.put(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, request, msg.copy()));
            }
            msg.release();
            for (int i = 0; i < size - 1; i++) {
                try {
                    Response result = (Response) results.get(i).get();
                    if (result.getResponseType() != ResponseType.SUCCESS) {
                        increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
                    }
                } catch (Exception e) {
                    LOG.error("wait for result for sync failed ", e);
                    increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
                }
            }
        }
    }
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ArrayList(java.util.ArrayList) PartitionKey(com.tencent.angel.PartitionKey)

Example 3 with FutureResult

use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.

the class PSClient method recoverPart.

/**
 * Recover a matrix partition for a ps
 * @param serverId dest ps id
 * @param location dest ps location
 * @param part need recover partition
 * @return recover result
 */
public FutureResult<Response> recoverPart(ParameterServerId serverId, Location location, ServerPartition part) {
    // Generate seq id
    int seqId = seqIdGen.incrementAndGet();
    FutureResult<Response> result = new FutureResult<>();
    seqIdToResultMap.put(seqId, result);
    // Create a RecoverPartRequest
    PartitionKey partKey = part.getPartitionKey();
    RecoverPartRequest request = new RecoverPartRequest(context.getClockVectorManager().getClockVec(partKey.getMatrixId(), partKey.getPartitionId()), new PartitionKey(partKey.getMatrixId(), partKey.getPartitionId()), part);
    request.getContext().setServerId(serverId);
    seqIdToRequestMap.put(seqId, request);
    // Serialize the request
    ByteBuf msg = ByteBufUtils.newByteBuf(8 + request.bufferLen(), useDirectBuf);
    msg.writeInt(seqId);
    msg.writeInt(request.getType().getMethodId());
    request.serialize(msg);
    send(serverId, location, seqId, request, msg, result);
    return result;
}
Also used : FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) PartitionKey(com.tencent.angel.PartitionKey) ByteBuf(io.netty.buffer.ByteBuf)

Example 4 with FutureResult

use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.

the class MatrixClientAdapter method flush.

/**
 * Flush the matrix oplog to parameter servers.
 *
 * @param matrixId    matrix id
 * @param taskContext task context
 * @param matrixOpLog matrix oplog
 * @param updateClock true means we should update the clock value after update matrix
 * @return Future<VoidResult> flush future result
 */
public Future<VoidResult> flush(int matrixId, TaskContext taskContext, MatrixOpLog matrixOpLog, boolean updateClock) {
    if (!updateClock && (matrixOpLog == null)) {
        FutureResult<VoidResult> ret = new FutureResult<VoidResult>();
        ret.set(new VoidResult(ResponseType.SUCCESS));
        return ret;
    }
    Map<PartitionKey, List<RowUpdateSplit>> psUpdateData = new HashMap<PartitionKey, List<RowUpdateSplit>>();
    FlushRequest request = new FlushRequest(taskContext.getMatrixClock(matrixId), taskContext.getIndex(), matrixId, matrixOpLog, updateClock);
    LOG.debug("start to flush update for matrix=" + matrixId + ", taskIndex=" + taskContext.getIndex());
    long startTs = System.currentTimeMillis();
    // Split the matrix oplog according to the matrix partitions
    if (matrixOpLog != null) {
        matrixOpLog.split(psUpdateData);
    }
    LOG.debug("split use time=" + (System.currentTimeMillis() - startTs));
    // If need update clock, we should send requests to all partitions
    if (updateClock) {
        fillPartRequestForClock(matrixId, psUpdateData, taskContext);
    }
    FlushResponseCache cache = new FlushResponseCache(psUpdateData.size());
    pushUpdates(matrixId, psUpdateData, taskContext, updateClock, cache);
    requestToResponseMap.put(request, cache);
    return cache.getMergedResult();
}
Also used : VoidResult(com.tencent.angel.ml.matrix.psf.update.enhance.VoidResult) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) PartitionKey(com.tencent.angel.PartitionKey) RowUpdateSplit(com.tencent.angel.psagent.matrix.oplog.cache.RowUpdateSplit)

Example 5 with FutureResult

use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.

the class PS2PSPusherImpl method updateClock.

@Override
public void updateClock(PartitionKey partKey, int taskIndex, int clock, PartitionLocation partLoc) {
    if (partLoc.psLocs.size() == 1) {
        return;
    } else {
        if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
            int size = partLoc.psLocs.size();
            List<FutureResult> results = new ArrayList<>(size - 1);
            for (int i = 1; i < size; i++) {
                results.add(psClient.updateClock(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, partKey, taskIndex, clock));
            }
            size = results.size();
            for (int i = 0; i < size; i++) {
                try {
                    Response result = (Response) results.get(i).get();
                    if (result.getResponseType() != ResponseType.SUCCESS) {
                        if (result.getResponseType() == ResponseType.NETWORK_ERROR || result.getResponseType() == ResponseType.TIMEOUT) {
                            context.getPSFailedReport().psFailed(partLoc.psLocs.get(i));
                        }
                        increaseFailedCounter(partKey, partLoc.psLocs.get(i));
                    }
                } catch (Exception e) {
                    LOG.error("wait for result for sync failed ", e);
                    increaseFailedCounter(partKey, partLoc.psLocs.get(i));
                }
            }
        }
    }
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ArrayList(java.util.ArrayList) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

FutureResult (com.tencent.angel.psagent.matrix.transport.FutureResult)7 Response (com.tencent.angel.ml.matrix.transport.Response)4 PartitionKey (com.tencent.angel.PartitionKey)3 ServerPartition (com.tencent.angel.ps.impl.matrix.ServerPartition)2 ByteBuf (io.netty.buffer.ByteBuf)2 ArrayList (java.util.ArrayList)2 VoidResult (com.tencent.angel.ml.matrix.psf.update.enhance.VoidResult)1 RowUpdateSplit (com.tencent.angel.psagent.matrix.oplog.cache.RowUpdateSplit)1 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 ExecutionException (java.util.concurrent.ExecutionException)1