Search in sources :

Example 1 with PSLocation

use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.

the class PS2PSPusherImpl method start.

/**
 * Start
 */
public void start() {
    psClient.start();
    workerPool = Executors.newFixedThreadPool(16);
    recoverChecker = new Thread(() -> {
        while (!stopped.get() && !Thread.interrupted()) {
            try {
                Thread.sleep(30000);
                Map<RecoverPartKey, FutureResult> futures = new HashMap<>();
                try {
                    lock.readLock().lock();
                    for (Map.Entry<PartitionKey, Map<PSLocation, Integer>> partEntry : failedUpdateCounters.entrySet()) {
                        PartitionKey partKey = partEntry.getKey();
                        Map<PSLocation, Integer> failedCounters = partEntry.getValue();
                        if (failedCounters.isEmpty()) {
                            continue;
                        }
                        PartitionLocation partLoc = context.getMaster().getPartLocation(partKey.getMatrixId(), partKey.getPartitionId());
                        if (partLoc.psLocs.size() > 1 && partLoc.psLocs.get(0).psId.equals(context.getPSAttemptId().getPsId())) {
                            for (int i = 1; i < partLoc.psLocs.size(); i++) {
                                PSLocation psLoc = partLoc.psLocs.get(i);
                                if (failedCounters.containsKey(psLoc) && failedCounters.get(psLoc) > 0) {
                                    RecoverPartKey recoverPartKey = new RecoverPartKey(partKey, psLoc);
                                    futures.put(recoverPartKey, recover(recoverPartKey));
                                }
                            }
                        }
                    }
                } finally {
                    lock.readLock().unlock();
                }
                waitResults(futures);
            } catch (Throwable e) {
                if (!stopped.get()) {
                    LOG.error("Start to ");
                }
            }
        }
    });
    recoverChecker.setName("Recover-checker");
    recoverChecker.start();
}
Also used : PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) RecoverPartKey(com.tencent.angel.ps.recovery.ha.RecoverPartKey) PartitionKey(com.tencent.angel.PartitionKey) HashMap(java.util.HashMap) Map(java.util.Map) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 2 with PSLocation

use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.

the class MasterClient method getPartLocation.

/**
 * Get the stored pss and the locations for a matrix partition
 * @param matrixId matrix id
 * @param partId partition id
 * @return the stored pss and the locations
 * @throws ServiceException
 */
public PartitionLocation getPartLocation(int matrixId, int partId) throws ServiceException {
    MLProtos.GetPartLocationResponse response = masterProxy.getPartLocation(null, MLProtos.GetPartLocationRequest.newBuilder().setMatrixId(matrixId).setPartId(partId).build());
    List<MLProtos.PSLocationProto> psLocsProto = response.getLocationsList();
    int size = psLocsProto.size();
    List<PSLocation> psLocs = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        psLocs.add(new PSLocation(ProtobufUtil.convertToId(psLocsProto.get(i).getPsId()), ProtobufUtil.convertToLocation(psLocsProto.get(i))));
    }
    return new PartitionLocation(psLocs);
}
Also used : PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) MLProtos(com.tencent.angel.protobuf.generated.MLProtos) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 3 with PSLocation

use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.

the class PSMatrixMetaManager method getPartLocation.

/**
 * Get partition location: includes stored pss and the location of the pss
 * @param partitionKey partition information
 * @return partition location
 * @throws ServiceException
 */
public PartitionLocation getPartLocation(PartitionKey partitionKey) throws ServiceException {
    List<ParameterServerId> psIds = getPss(partitionKey);
    int size = psIds.size();
    List<PSLocation> psLocs = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        psLocs.add(new PSLocation(psIds.get(i), context.getLocationManager().getPsLocation(psIds.get(i))));
    }
    return new PartitionLocation(psLocs);
}
Also used : PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) ArrayList(java.util.ArrayList) ParameterServerId(com.tencent.angel.ps.ParameterServerId) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 4 with PSLocation

use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.

the class MasterClient method getPartLocation.

public PartitionLocation getPartLocation(int matrixId, int partId) throws ServiceException {
    GetPartLocationResponse response = master.getPartLocation(null, GetPartLocationRequest.newBuilder().setMatrixId(matrixId).setPartId(partId).build());
    List<PSLocationProto> psLocsProto = response.getLocationsList();
    int size = psLocsProto.size();
    List<PSLocation> psLocs = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        psLocs.add(new PSLocation(ProtobufUtil.convertToId(psLocsProto.get(i).getPsId()), ProtobufUtil.convertToLocation(psLocsProto.get(i))));
    }
    return new PartitionLocation(psLocs);
}
Also used : PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 5 with PSLocation

use of com.tencent.angel.ml.matrix.transport.PSLocation in project angel by Tencent.

the class PSAgentMatrixMetaManager method getPartLocation.

/**
 * Get partition location: includes stored pss and the location of the pss
 * @param partitionKey partition information
 * @return partition location
 * @throws ServiceException
 */
public PartitionLocation getPartLocation(PartitionKey partitionKey) {
    List<ParameterServerId> psIds = getPss(partitionKey);
    int size = psIds.size();
    List<PSLocation> psLocs = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        psLocs.add(new PSLocation(psIds.get(i), PSAgentContext.get().getLocationManager().getPsLocation(psIds.get(i))));
    }
    return new PartitionLocation(psLocs);
}
Also used : PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) ParameterServerId(com.tencent.angel.ps.ParameterServerId) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Aggregations

PSLocation (com.tencent.angel.ml.matrix.transport.PSLocation)10 ParameterServerId (com.tencent.angel.ps.ParameterServerId)6 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)5 HashMap (java.util.HashMap)3 PartitionKey (com.tencent.angel.PartitionKey)2 Location (com.tencent.angel.common.location.Location)2 RecoverPartKey (com.tencent.angel.ps.recovery.ha.RecoverPartKey)2 DenseIntVector (com.tencent.angel.ml.math.vector.DenseIntVector)1 MLProtos (com.tencent.angel.protobuf.generated.MLProtos)1 PSAttemptId (com.tencent.angel.ps.PSAttemptId)1 MatrixStorageManager (com.tencent.angel.ps.impl.MatrixStorageManager)1 ParameterServer (com.tencent.angel.ps.impl.ParameterServer)1 ServerDenseIntRow (com.tencent.angel.ps.impl.matrix.ServerDenseIntRow)1 ServerMatrix (com.tencent.angel.ps.impl.matrix.ServerMatrix)1 MatrixClient (com.tencent.angel.psagent.matrix.MatrixClient)1 Worker (com.tencent.angel.worker.Worker)1 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)1 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)1 WorkerId (com.tencent.angel.worker.WorkerId)1 TaskContext (com.tencent.angel.worker.task.TaskContext)1