use of com.tencent.angel.PartitionKey in project angel by Tencent.
the class PS2PSPusherImpl method start.
/**
* Start
*/
public void start() {
psClient.start();
workerPool = Executors.newFixedThreadPool(16);
recoverChecker = new Thread(() -> {
while (!stopped.get() && !Thread.interrupted()) {
try {
Thread.sleep(30000);
Map<RecoverPartKey, FutureResult> futures = new HashMap<>();
try {
lock.readLock().lock();
for (Map.Entry<PartitionKey, Map<PSLocation, Integer>> partEntry : failedUpdateCounters.entrySet()) {
PartitionKey partKey = partEntry.getKey();
Map<PSLocation, Integer> failedCounters = partEntry.getValue();
if (failedCounters.isEmpty()) {
continue;
}
PartitionLocation partLoc = context.getMaster().getPartLocation(partKey.getMatrixId(), partKey.getPartitionId());
if (partLoc.psLocs.size() > 1 && partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
for (int i = 1; i < partLoc.psLocs.size(); i++) {
PSLocation psLoc = partLoc.psLocs.get(i);
if (failedCounters.containsKey(psLoc) && failedCounters.get(psLoc) > 0) {
RecoverPartKey recoverPartKey = new RecoverPartKey(partKey, psLoc);
futures.put(recoverPartKey, recover(recoverPartKey));
}
}
}
}
} finally {
lock.readLock().unlock();
}
waitResults(futures);
} catch (Throwable e) {
if (!stopped.get()) {
LOG.error("Start to ");
}
}
}
});
recoverChecker.setName("Recover-checker");
recoverChecker.start();
}
use of com.tencent.angel.PartitionKey in project angel by Tencent.
the class SyncEventPusher method put.
@Override
public void put(PartitionRequest request, ByteBuf msg, PartitionLocation partLoc) {
request.setComeFromPs(true);
msg.resetReaderIndex();
msg.setBoolean(8, true);
PartitionKey partKey = request.getPartKey();
if (partLoc.psLocs.size() == 1) {
return;
} else {
if (partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
int size = partLoc.psLocs.size();
List<FutureResult> results = new ArrayList<>(size - 1);
for (int i = 1; i < size; i++) {
results.add(psClient.put(partLoc.psLocs.get(i).psId, partLoc.psLocs.get(i).loc, request, msg.copy()));
}
msg.release();
for (int i = 0; i < size - 1; i++) {
try {
Response result = (Response) results.get(i).get();
if (result.getResponseType() != ResponseType.SUCCESS) {
increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
}
} catch (Exception e) {
LOG.error("wait for result for sync failed ", e);
increaseFailedCounter(partKey, partLoc.psLocs.get(i + 1));
}
}
}
}
}
use of com.tencent.angel.PartitionKey in project angel by Tencent.
the class PSClient method recoverPart.
/**
* Recover a matrix partition for a ps
* @param serverId dest ps id
* @param location dest ps location
* @param part need recover partition
* @return recover result
*/
public FutureResult<Response> recoverPart(ParameterServerId serverId, Location location, ServerPartition part) {
// Generate seq id
int seqId = seqIdGen.incrementAndGet();
FutureResult<Response> result = new FutureResult<>();
seqIdToResultMap.put(seqId, result);
// Create a RecoverPartRequest
PartitionKey partKey = part.getPartitionKey();
RecoverPartRequest request = new RecoverPartRequest(context.getClockVectorManager().getClockVec(partKey.getMatrixId(), partKey.getPartitionId()), new PartitionKey(partKey.getMatrixId(), partKey.getPartitionId()), part);
request.getContext().setServerId(serverId);
seqIdToRequestMap.put(seqId, request);
// Serialize the request
ByteBuf msg = ByteBufUtils.newByteBuf(8 + request.bufferLen(), useDirectBuf);
msg.writeInt(seqId);
msg.writeInt(request.getType().getMethodId());
request.serialize(msg);
send(serverId, location, seqId, request, msg, result);
return result;
}
use of com.tencent.angel.PartitionKey in project angel by Tencent.
the class ServerMatrix method savePartitions.
private void savePartitions(Path matrixPath, FileSystem fs, List<Integer> partitionIds, int startPos, int endPos, PSModelFilesMeta serverMatrixMeta) throws IOException {
Path destFile = new Path(matrixPath, ModelFilesUtils.fileName(context.getPs().getServerId(), partitionIds.get(startPos)));
Path tmpDestFile = HdfsUtil.toTmpPath(destFile);
FSDataOutputStream out = fs.create(tmpDestFile);
long streamPos = 0;
ServerPartition partition = null;
for (int i = startPos; i < endPos; i++) {
LOG.info("Write partition " + partitionIds.get(i) + " of matrix " + matrixName + " to " + tmpDestFile);
streamPos = out.getPos();
partition = partitionMaps.get(partitionIds.get(i));
PartitionKey partKey = partition.getPartitionKey();
ModelPartitionMeta partMeta = new ModelPartitionMeta(partKey.getPartitionId(), partKey.getStartRow(), partKey.getEndRow(), partKey.getStartCol(), partKey.getEndCol(), partition.elementNum(), destFile.getName(), streamPos, 0);
partition.save(out, partMeta);
partMeta.setLength(out.getPos() - streamPos);
serverMatrixMeta.addPartitionMeta(partitionIds.get(i), partMeta);
}
out.flush();
out.close();
HdfsUtil.rename(tmpDestFile, destFile, fs);
}
use of com.tencent.angel.PartitionKey in project angel by Tencent.
the class WorkerPool method recoverPart.
/**
* Recover a partition
* @param seqId rpc request it
* @param request request
* @return serialized rpc response
*/
private ByteBuf recoverPart(int seqId, RecoverPartRequest request) {
if (LOG.isDebugEnabled()) {
LOG.debug("recover part request=" + request + " with seqId=" + seqId);
}
long startTs = System.currentTimeMillis();
ByteBuf buf = ByteBufUtils.newByteBuf(8 + 4);
buf.writeInt(seqId);
Response response = null;
PartitionKey partKey = request.getPartKey();
Int2IntOpenHashMap clockVec = request.getTaskIndexToClockMap();
if (clockVec != null) {
context.getClockVectorManager().setClockVec(partKey.getMatrixId(), partKey.getPartitionId(), clockVec);
}
ServerPartition part = context.getMatrixStorageManager().getPart(partKey.getMatrixId(), partKey.getPartitionId());
if (part == null) {
String log = "can not find the partition " + partKey;
response = new Response(ResponseType.SERVER_HANDLE_FATAL, log);
response.serialize(buf);
return buf;
}
part.recover(request.getPart());
response = new Response(ResponseType.SUCCESS);
response.serialize(buf);
if (LOG.isDebugEnabled()) {
LOG.debug("recover partition request " + request + " use time=" + (System.currentTimeMillis() - startTs));
}
return buf;
}
Aggregations