use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.
the class PS2PSPusherImpl method recover.
@Override
public FutureResult<Response> recover(final RecoverPartKey partKey) {
FutureResult<Response> result = new FutureResult<>();
workerPool.execute(() -> {
ServerPartition part = context.getMatrixStorageManager().getPart(partKey.partKey.getMatrixId(), partKey.partKey.getPartitionId());
if (part == null) {
result.set(new Response(ResponseType.UNKNOWN_ERROR, "Can not find partition " + partKey.partKey.getMatrixId() + ":" + partKey.partKey.getPartitionId()));
return;
}
try {
part.waitAndSetReadOnly();
result.set(psClient.recoverPart(partKey.psLoc.psId, partKey.psLoc.loc, part).get());
} catch (Throwable e) {
result.set(new Response(ResponseType.NETWORK_ERROR, e.getMessage()));
LOG.error("handle recover event " + partKey + " failed ", e);
} finally {
part.setState(PartitionState.READ_AND_WRITE);
LOG.info("ps " + context.getPSAttemptId() + " set partition " + part.getPartitionKey() + " to " + part.getState());
}
});
return result;
}
use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.
the class SyncEventPusher method put.
@Override
public void put(PartitionRequest request, ByteBuf msg, PartitionLocation partLoc) {
request.setComeFromPs(true);
msg.resetReaderIndex();
msg.setBoolean(8, true);
PartitionKey partKey = request.getPartKey();
if (partLoc.psLocs.size() == 1) {
return;
} else {
if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
int size = partLoc.psLocs.size();
List<FutureResult> results = new ArrayList<>(size - 1);
for (int i = 1; i < size; i++) {
results.add(psClient.put(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, request, msg.copy()));
}
msg.release();
for (int i = 0; i < size - 1; i++) {
try {
Response result = (Response) results.get(i).get();
if (result.getResponseType() != ResponseType.SUCCESS) {
increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
}
} catch (Exception e) {
LOG.error("wait for result for sync failed ", e);
increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
}
}
}
}
}
use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.
the class PSClient method recoverPart.
/**
* Recover a matrix partition for a ps
* @param serverId dest ps id
* @param location dest ps location
* @param part need recover partition
* @return recover result
*/
public FutureResult<Response> recoverPart(ParameterServerId serverId, Location location, ServerPartition part) {
// Generate seq id
int seqId = seqIdGen.incrementAndGet();
FutureResult<Response> result = new FutureResult<>();
seqIdToResultMap.put(seqId, result);
// Create a RecoverPartRequest
PartitionKey partKey = part.getPartitionKey();
RecoverPartRequest request = new RecoverPartRequest(context.getClockVectorManager().getClockVec(partKey.getMatrixId(), partKey.getPartitionId()), new PartitionKey(partKey.getMatrixId(), partKey.getPartitionId()), part);
request.getContext().setServerId(serverId);
seqIdToRequestMap.put(seqId, request);
// Serialize the request
ByteBuf msg = ByteBufUtils.newByteBuf(8 + request.bufferLen(), useDirectBuf);
msg.writeInt(seqId);
msg.writeInt(request.getType().getMethodId());
request.serialize(msg);
send(serverId, location, seqId, request, msg, result);
return result;
}
use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.
the class MatrixClientAdapter method flush.
/**
* Flush the matrix oplog to parameter servers.
*
* @param matrixId matrix id
* @param taskContext task context
* @param matrixOpLog matrix oplog
* @param updateClock true means we should update the clock value after update matrix
* @return Future<VoidResult> flush future result
*/
public Future<VoidResult> flush(int matrixId, TaskContext taskContext, MatrixOpLog matrixOpLog, boolean updateClock) {
if (!updateClock && (matrixOpLog == null)) {
FutureResult<VoidResult> ret = new FutureResult<VoidResult>();
ret.set(new VoidResult(ResponseType.SUCCESS));
return ret;
}
Map<PartitionKey, List<RowUpdateSplit>> psUpdateData = new HashMap<PartitionKey, List<RowUpdateSplit>>();
FlushRequest request = new FlushRequest(taskContext.getMatrixClock(matrixId), taskContext.getIndex(), matrixId, matrixOpLog, updateClock);
LOG.debug("start to flush update for matrix=" + matrixId + ", taskIndex=" + taskContext.getIndex());
long startTs = System.currentTimeMillis();
// Split the matrix oplog according to the matrix partitions
if (matrixOpLog != null) {
matrixOpLog.split(psUpdateData);
}
LOG.debug("split use time=" + (System.currentTimeMillis() - startTs));
// If need update clock, we should send requests to all partitions
if (updateClock) {
fillPartRequestForClock(matrixId, psUpdateData, taskContext);
}
FlushResponseCache cache = new FlushResponseCache(psUpdateData.size());
pushUpdates(matrixId, psUpdateData, taskContext, updateClock, cache);
requestToResponseMap.put(request, cache);
return cache.getMergedResult();
}
use of com.tencent.angel.psagent.matrix.transport.FutureResult in project angel by Tencent.
the class PS2PSPusherImpl method updateClock.
@Override
public void updateClock(PartitionKey partKey, int taskIndex, int clock, PartitionLocation partLoc) {
if (partLoc.psLocs.size() == 1) {
return;
} else {
if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
int size = partLoc.psLocs.size();
List<FutureResult> results = new ArrayList<>(size - 1);
for (int i = 1; i < size; i++) {
results.add(psClient.updateClock(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, partKey, taskIndex, clock));
}
size = results.size();
for (int i = 0; i < size; i++) {
try {
Response result = (Response) results.get(i).get();
if (result.getResponseType() != ResponseType.SUCCESS) {
if (result.getResponseType() == ResponseType.NETWORK_ERROR || result.getResponseType() == ResponseType.TIMEOUT) {
context.getPSFailedReport().psFailed(partLoc.psLocs.get(i));
}
increaseFailedCounter(partKey, partLoc.psLocs.get(i));
}
} catch (Exception e) {
LOG.error("wait for result for sync failed ", e);
increaseFailedCounter(partKey, partLoc.psLocs.get(i));
}
}
}
}
}
Aggregations