Search in sources :

Example 1 with Response

use of com.tencent.angel.ml.matrix.transport.Response in project angel by Tencent.

the class PS2PSPusherImpl method recover.

@Override
public FutureResult<Response> recover(final RecoverPartKey partKey) {
    FutureResult<Response> result = new FutureResult<>();
    workerPool.execute(() -> {
        ServerPartition part = context.getMatrixStorageManager().getPart(partKey.partKey.getMatrixId(), partKey.partKey.getPartitionId());
        if (part == null) {
            result.set(new Response(ResponseType.UNKNOWN_ERROR, "Can not find partition " + partKey.partKey.getMatrixId() + ":" + partKey.partKey.getPartitionId()));
            return;
        }
        try {
            part.waitAndSetReadOnly();
            result.set(psClient.recoverPart(partKey.psLoc.psId, partKey.psLoc.loc, part).get());
        } catch (Throwable e) {
            result.set(new Response(ResponseType.NETWORK_ERROR, e.getMessage()));
            LOG.error("handle recover event " + partKey + " failed ", e);
        } finally {
            part.setState(PartitionState.READ_AND_WRITE);
            LOG.info("ps " + context.getPSAttemptId() + " set partition " + part.getPartitionKey() + " to " + part.getState());
        }
    });
    return result;
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ServerPartition(com.tencent.angel.ps.impl.matrix.ServerPartition)

Example 2 with Response

use of com.tencent.angel.ml.matrix.transport.Response in project angel by Tencent.

the class SyncEventPusher method put.

@Override
public void put(PartitionRequest request, ByteBuf msg, PartitionLocation partLoc) {
    request.setComeFromPs(true);
    msg.resetReaderIndex();
    msg.setBoolean(8, true);
    PartitionKey partKey = request.getPartKey();
    if (partLoc.psLocs.size() == 1) {
        return;
    } else {
        if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
            int size = partLoc.psLocs.size();
            List<FutureResult> results = new ArrayList<>(size - 1);
            for (int i = 1; i < size; i++) {
                results.add(psClient.put(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, request, msg.copy()));
            }
            msg.release();
            for (int i = 0; i < size - 1; i++) {
                try {
                    Response result = (Response) results.get(i).get();
                    if (result.getResponseType() != ResponseType.SUCCESS) {
                        increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
                    }
                } catch (Exception e) {
                    LOG.error("wait for result for sync failed ", e);
                    increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
                }
            }
        }
    }
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ArrayList(java.util.ArrayList) PartitionKey(com.tencent.angel.PartitionKey)

Example 3 with Response

use of com.tencent.angel.ml.matrix.transport.Response in project angel by Tencent.

the class PS2PSPusherImpl method updateClock.

@Override
public void updateClock(PartitionKey partKey, int taskIndex, int clock, PartitionLocation partLoc) {
    if (partLoc.psLocs.size() == 1) {
        return;
    } else {
        if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
            int size = partLoc.psLocs.size();
            List<FutureResult> results = new ArrayList<>(size - 1);
            for (int i = 1; i < size; i++) {
                results.add(psClient.updateClock(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, partKey, taskIndex, clock));
            }
            size = results.size();
            for (int i = 0; i < size; i++) {
                try {
                    Response result = (Response) results.get(i).get();
                    if (result.getResponseType() != ResponseType.SUCCESS) {
                        if (result.getResponseType() == ResponseType.NETWORK_ERROR || result.getResponseType() == ResponseType.TIMEOUT) {
                            context.getPSFailedReport().psFailed(partLoc.psLocs.get(i));
                        }
                        increaseFailedCounter(partKey, partLoc.psLocs.get(i));
                    }
                } catch (Exception e) {
                    LOG.error("wait for result for sync failed ", e);
                    increaseFailedCounter(partKey, partLoc.psLocs.get(i));
                }
            }
        }
    }
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ArrayList(java.util.ArrayList) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with Response

use of com.tencent.angel.ml.matrix.transport.Response in project angel by Tencent.

the class PeriodPusher method recover.

@Override
public FutureResult<Response> recover(RecoverPartKey partKey) {
    FutureResult<Response> result = new FutureResult<>();
    workerPool.execute(() -> {
        ServerPartition part = context.getMatrixStorageManager().getPart(partKey.partKey);
        if (part == null) {
            result.set(new Response(ResponseType.UNKNOWN_ERROR, "Can not find partition " + partKey.partKey.getMatrixId() + ":" + partKey.partKey.getPartitionId()));
            return;
        }
        try {
            result.set(psClient.recoverPart(partKey.psLoc.psId, partKey.psLoc.loc, part).get());
        } catch (Throwable e) {
            LOG.error("recover part " + partKey + " falied ", e);
            result.set(new Response(ResponseType.UNKNOWN_ERROR, e.getMessage()));
        }
    });
    return result;
}
Also used : Response(com.tencent.angel.ml.matrix.transport.Response) FutureResult(com.tencent.angel.psagent.matrix.transport.FutureResult) ServerPartition(com.tencent.angel.ps.impl.matrix.ServerPartition)

Aggregations

Response (com.tencent.angel.ml.matrix.transport.Response)4 FutureResult (com.tencent.angel.psagent.matrix.transport.FutureResult)4 ServerPartition (com.tencent.angel.ps.impl.matrix.ServerPartition)2 ArrayList (java.util.ArrayList)2 PartitionKey (com.tencent.angel.PartitionKey)1 ExecutionException (java.util.concurrent.ExecutionException)1