Search in sources :

Example 1 with MatricesCache

use of com.tencent.angel.psagent.matrix.cache.MatricesCache in project angel by Tencent.

the class PSAgent method initAndStart.

public void initAndStart() throws Exception {
    // Get ps locations from master and put them to the location cache.
    locationManager = new PSAgentLocationManager(PSAgentContext.get());
    locationManager.setMasterLocation(masterLocation);
    // Build and initialize rpc client to master
    masterClient = new MasterClient();
    masterClient.init();
    // Build local location
    String localIp = NetUtils.getRealLocalIP();
    int port = NetUtils.chooseAListenPort(conf);
    location = new Location(localIp, port);
    // Initialize matrix meta information
    clockCache = new ClockCache();
    List<MatrixMeta> matrixMetas = masterClient.getMatrices();
    LOG.info("===========================PSAgent get matrices from master," + matrixMetas.size());
    this.matrixMetaManager = new PSAgentMatrixMetaManager(clockCache);
    matrixMetaManager.addMatrices(matrixMetas);
    Map<ParameterServerId, Location> psIdToLocMap = masterClient.getPSLocations();
    List<ParameterServerId> psIds = new ArrayList<>(psIdToLocMap.keySet());
    Collections.sort(psIds, new Comparator<ParameterServerId>() {

        @Override
        public int compare(ParameterServerId s1, ParameterServerId s2) {
            return s1.getIndex() - s2.getIndex();
        }
    });
    int size = psIds.size();
    locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
    for (int i = 0; i < size; i++) {
        if (psIdToLocMap.containsKey(psIds.get(i))) {
            locationManager.setPsLocation(psIds.get(i), psIdToLocMap.get(psIds.get(i)));
        }
    }
    matrixTransClient = new MatrixTransportClient();
    matrixClientAdapter = new MatrixClientAdapter();
    opLogCache = new MatrixOpLogCache();
    matrixStorageManager = new MatrixStorageManager();
    matricesCache = new MatricesCache();
    int staleness = conf.getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
    consistencyController = new ConsistencyController(staleness);
    consistencyController.init();
    psAgentInitFinishedFlag.set(true);
    // Start heartbeat thread if need
    if (needHeartBeat) {
        startHeartbeatThread();
    }
    // Start all services
    matrixTransClient.start();
    matrixClientAdapter.start();
    clockCache.start();
    opLogCache.start();
    matricesCache.start();
}
Also used : MatrixClientAdapter(com.tencent.angel.psagent.matrix.transport.adapter.MatrixClientAdapter) ClockCache(com.tencent.angel.psagent.clock.ClockCache) MasterClient(com.tencent.angel.psagent.client.MasterClient) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) MatricesCache(com.tencent.angel.psagent.matrix.cache.MatricesCache) ConsistencyController(com.tencent.angel.psagent.consistency.ConsistencyController) MatrixTransportClient(com.tencent.angel.psagent.matrix.transport.MatrixTransportClient) MatrixOpLogCache(com.tencent.angel.psagent.matrix.oplog.cache.MatrixOpLogCache) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) MatrixStorageManager(com.tencent.angel.psagent.matrix.storage.MatrixStorageManager) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Location(com.tencent.angel.common.location.Location)

Example 2 with MatricesCache

use of com.tencent.angel.psagent.matrix.cache.MatricesCache in project angel by Tencent.

the class MatrixClientAdapter method getRow.

/**
 * Get a matrix row from parameter servers
 *
 * @param matrixId matrix id
 * @param rowIndex row index
 * @param clock    clock value
 * @return TVector matrix row
 * @throws ExecutionException   exception thrown when attempting to retrieve the result of a task
 *                              that aborted by throwing an exception
 * @throws InterruptedException interrupted while wait the result
 */
public TVector getRow(int matrixId, int rowIndex, int clock) throws InterruptedException, ExecutionException {
    LOG.debug("start to getRow request, matrix=" + matrixId + ", rowIndex=" + rowIndex + ", clock=" + clock);
    long startTs = System.currentTimeMillis();
    // Wait until the clock value of this row is greater than or equal to the value
    PSAgentContext.get().getConsistencyController().waitForClock(matrixId, rowIndex, clock);
    LOG.debug("getRow wait clock time=" + (System.currentTimeMillis() - startTs));
    startTs = System.currentTimeMillis();
    // Get partitions for this row
    List<PartitionKey> partList = PSAgentContext.get().getMatrixMetaManager().getPartitions(matrixId, rowIndex);
    GetRowRequest request = new GetRowRequest(matrixId, rowIndex, clock);
    MatrixMeta meta = PSAgentContext.get().getMatrixMetaManager().getMatrixMeta(matrixId);
    GetRowPipelineCache responseCache = (GetRowPipelineCache) requestToResponseMap.get(request);
    if (responseCache == null) {
        responseCache = new GetRowPipelineCache(partList.size(), meta.getRowType());
        GetRowPipelineCache oldCache = (GetRowPipelineCache) requestToResponseMap.putIfAbsent(request, responseCache);
        if (oldCache != null) {
            responseCache = oldCache;
        }
    }
    // First get this row from matrix storage
    MatrixStorage matrixStorage = PSAgentContext.get().getMatrixStorageManager().getMatrixStoage(matrixId);
    try {
        responseCache.getDistinctLock().lock();
        // If the row exists in the matrix storage and the clock value meets the requirements, just
        // return
        TVector row = matrixStorage.getRow(rowIndex);
        if (row != null && row.getClock() >= clock) {
            return row;
        }
        // Get row splits of this row from the matrix cache first
        MatricesCache matricesCache = PSAgentContext.get().getMatricesCache();
        MatrixTransportClient matrixClient = PSAgentContext.get().getMatrixTransportClient();
        int size = partList.size();
        for (int i = 0; i < size; i++) {
            ServerRow rowSplit = matricesCache.getRowSplit(matrixId, partList.get(i), rowIndex);
            if (rowSplit != null && rowSplit.getClock() >= clock) {
                responseCache.addRowSplit(rowSplit);
            } else {
                // If the row split does not exist in cache, get it from parameter server
                responseCache.addRowSplit(matrixClient.getRowSplit(partList.get(i), rowIndex, clock));
            }
        }
        // Wait the final result
        row = responseCache.getMergedResult().get();
        LOG.debug("get row use time=" + (System.currentTimeMillis() - startTs));
        // Put it to the matrix cache
        matrixStorage.addRow(rowIndex, row);
        return row;
    } finally {
        responseCache.getDistinctLock().unlock();
        requestToResponseMap.remove(request);
    }
}
Also used : MatrixTransportClient(com.tencent.angel.psagent.matrix.transport.MatrixTransportClient) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PartitionKey(com.tencent.angel.PartitionKey) ServerRow(com.tencent.angel.ps.impl.matrix.ServerRow) MatricesCache(com.tencent.angel.psagent.matrix.cache.MatricesCache) TVector(com.tencent.angel.ml.math.TVector) MatrixStorage(com.tencent.angel.psagent.matrix.storage.MatrixStorage)

Aggregations

MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)2 MatricesCache (com.tencent.angel.psagent.matrix.cache.MatricesCache)2 MatrixTransportClient (com.tencent.angel.psagent.matrix.transport.MatrixTransportClient)2 PartitionKey (com.tencent.angel.PartitionKey)1 Location (com.tencent.angel.common.location.Location)1 TVector (com.tencent.angel.ml.math.TVector)1 ParameterServerId (com.tencent.angel.ps.ParameterServerId)1 ServerRow (com.tencent.angel.ps.impl.matrix.ServerRow)1 MasterClient (com.tencent.angel.psagent.client.MasterClient)1 ClockCache (com.tencent.angel.psagent.clock.ClockCache)1 ConsistencyController (com.tencent.angel.psagent.consistency.ConsistencyController)1 PSAgentLocationManager (com.tencent.angel.psagent.matrix.PSAgentLocationManager)1 PSAgentMatrixMetaManager (com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager)1 MatrixOpLogCache (com.tencent.angel.psagent.matrix.oplog.cache.MatrixOpLogCache)1 MatrixStorage (com.tencent.angel.psagent.matrix.storage.MatrixStorage)1 MatrixStorageManager (com.tencent.angel.psagent.matrix.storage.MatrixStorageManager)1 MatrixClientAdapter (com.tencent.angel.psagent.matrix.transport.adapter.MatrixClientAdapter)1