Search in sources :

Example 1 with PSAgentLocationManager

use of com.tencent.angel.psagent.matrix.PSAgentLocationManager in project angel by Tencent.

the class PSAgent method initAndStart.

public void initAndStart() throws Exception {
    // Get ps locations from master and put them to the location cache.
    locationManager = new PSAgentLocationManager(PSAgentContext.get());
    locationManager.setMasterLocation(masterLocation);
    // Build and initialize rpc client to master
    masterClient = new MasterClient();
    masterClient.init();
    // Build local location
    String localIp = NetUtils.getRealLocalIP();
    int port = NetUtils.chooseAListenPort(conf);
    location = new Location(localIp, port);
    // Initialize matrix meta information
    clockCache = new ClockCache();
    List<MatrixMeta> matrixMetas = masterClient.getMatrices();
    LOG.info("===========================PSAgent get matrices from master," + matrixMetas.size());
    this.matrixMetaManager = new PSAgentMatrixMetaManager(clockCache);
    matrixMetaManager.addMatrices(matrixMetas);
    Map<ParameterServerId, Location> psIdToLocMap = masterClient.getPSLocations();
    List<ParameterServerId> psIds = new ArrayList<>(psIdToLocMap.keySet());
    Collections.sort(psIds, new Comparator<ParameterServerId>() {

        @Override
        public int compare(ParameterServerId s1, ParameterServerId s2) {
            return s1.getIndex() - s2.getIndex();
        }
    });
    int size = psIds.size();
    locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
    for (int i = 0; i < size; i++) {
        if (psIdToLocMap.containsKey(psIds.get(i))) {
            locationManager.setPsLocation(psIds.get(i), psIdToLocMap.get(psIds.get(i)));
        }
    }
    matrixTransClient = new MatrixTransportClient();
    matrixClientAdapter = new MatrixClientAdapter();
    opLogCache = new MatrixOpLogCache();
    matrixStorageManager = new MatrixStorageManager();
    matricesCache = new MatricesCache();
    int staleness = conf.getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
    consistencyController = new ConsistencyController(staleness);
    consistencyController.init();
    psAgentInitFinishedFlag.set(true);
    // Start heartbeat thread if need
    if (needHeartBeat) {
        startHeartbeatThread();
    }
    // Start all services
    matrixTransClient.start();
    matrixClientAdapter.start();
    clockCache.start();
    opLogCache.start();
    matricesCache.start();
}
Also used : MatrixClientAdapter(com.tencent.angel.psagent.matrix.transport.adapter.MatrixClientAdapter) ClockCache(com.tencent.angel.psagent.clock.ClockCache) MasterClient(com.tencent.angel.psagent.client.MasterClient) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) MatricesCache(com.tencent.angel.psagent.matrix.cache.MatricesCache) ConsistencyController(com.tencent.angel.psagent.consistency.ConsistencyController) MatrixTransportClient(com.tencent.angel.psagent.matrix.transport.MatrixTransportClient) MatrixOpLogCache(com.tencent.angel.psagent.matrix.oplog.cache.MatrixOpLogCache) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) MatrixStorageManager(com.tencent.angel.psagent.matrix.storage.MatrixStorageManager) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Location(com.tencent.angel.common.location.Location)

Example 2 with PSAgentLocationManager

use of com.tencent.angel.psagent.matrix.PSAgentLocationManager in project angel by Tencent.

the class PSAgentTest method testMatrixLocationManager.

@Test
public void testMatrixLocationManager() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        assertTrue(worker != null);
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentMatrixMetaManager matrixPartitionRouter = psAgent.getMatrixMetaManager();
        PSAgentLocationManager locationCache = psAgent.getLocationManager();
        assertTrue(matrixPartitionRouter != null);
        // test ps location
        Location psLoc = locationCache.getPsLocation(psId);
        String ipRegex = "(2[5][0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})";
        Pattern pattern = Pattern.compile(ipRegex);
        Matcher matcher = pattern.matcher(psLoc.getIp());
        assertTrue(matcher.matches());
        assertTrue(psLoc.getPort() >= 1 && psLoc.getPort() <= 65535);
        int matrix1Id = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMatrixMetaManager().getMatrix("w1").getId();
        int matrix2Id = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMatrixMetaManager().getMatrix("w2").getId();
        // test partitions
        List<PartitionKey> partition1Keys = matrixPartitionRouter.getPartitions(matrix1Id);
        assertEquals(partition1Keys.size(), 2);
        List<PartitionKey> partition2Keys = matrixPartitionRouter.getPartitions(matrix1Id);
        assertEquals(partition2Keys.size(), 2);
        partition1Keys.clear();
        partition1Keys = matrixPartitionRouter.getPartitions(matrix1Id, 0);
        assertEquals(partition1Keys.size(), 2);
        partition2Keys.clear();
        partition2Keys = matrixPartitionRouter.getPartitions(matrix1Id, 0);
        assertEquals(partition2Keys.size(), 2);
        int rowPartSize = matrixPartitionRouter.getRowPartitionSize(matrix1Id, 0);
        assertEquals(rowPartSize, 2);
        rowPartSize = matrixPartitionRouter.getRowPartitionSize(matrix1Id, 0);
        assertEquals(rowPartSize, 2);
    } catch (Exception x) {
        LOG.error("run testMatrixLocationManager failed ", x);
        throw x;
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) Worker(com.tencent.angel.worker.Worker) PartitionKey(com.tencent.angel.PartitionKey) Location(com.tencent.angel.common.location.Location) Test(org.junit.Test)

Example 3 with PSAgentLocationManager

use of com.tencent.angel.psagent.matrix.PSAgentLocationManager in project angel by Tencent.

the class PSAgentTest method testLocationCache.

@Test
public void testLocationCache() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentLocationManager locationCache = psAgent.getLocationManager();
        assertTrue(locationCache != null);
        // test master location
        Location masterLoc = locationCache.getMasterLocation();
        String ipRegex = "(2[5][0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})";
        Pattern pattern = Pattern.compile(ipRegex);
        Matcher matcher = pattern.matcher(masterLoc.getIp());
        assertTrue(matcher.matches());
        assertTrue(masterLoc.getPort() >= 1 && masterLoc.getPort() <= 65535);
        // test ps location
        Location psLoc = locationCache.getPsLocation(psId);
        matcher = pattern.matcher(psLoc.getIp());
        assertTrue(matcher.matches());
        assertTrue(psLoc.getPort() >= 1 && psLoc.getPort() <= 65535);
        // assertEquals(psLoc, locationCache.updateAndGetPSLocation(psId));
        // test all ps ids
        ParameterServerId[] allPSIds = locationCache.getPsIds();
        assertEquals(allPSIds.length, 1);
        assertEquals(allPSIds[0], psId);
    } catch (Exception x) {
        LOG.error("run testLocationCache failed ", x);
        throw x;
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) Worker(com.tencent.angel.worker.Worker) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Location(com.tencent.angel.common.location.Location) Test(org.junit.Test)

Aggregations

Location (com.tencent.angel.common.location.Location)3 PSAgentLocationManager (com.tencent.angel.psagent.matrix.PSAgentLocationManager)3 AngelApplicationMaster (com.tencent.angel.master.AngelApplicationMaster)2 AMTaskManager (com.tencent.angel.master.task.AMTaskManager)2 WorkerManager (com.tencent.angel.master.worker.WorkerManager)2 ParameterServerId (com.tencent.angel.ps.ParameterServerId)2 PSAgentMatrixMetaManager (com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager)2 Worker (com.tencent.angel.worker.Worker)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 Test (org.junit.Test)2 PartitionKey (com.tencent.angel.PartitionKey)1 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)1 MasterClient (com.tencent.angel.psagent.client.MasterClient)1 ClockCache (com.tencent.angel.psagent.clock.ClockCache)1 ConsistencyController (com.tencent.angel.psagent.consistency.ConsistencyController)1 MatricesCache (com.tencent.angel.psagent.matrix.cache.MatricesCache)1 MatrixOpLogCache (com.tencent.angel.psagent.matrix.oplog.cache.MatrixOpLogCache)1 MatrixStorageManager (com.tencent.angel.psagent.matrix.storage.MatrixStorageManager)1 MatrixTransportClient (com.tencent.angel.psagent.matrix.transport.MatrixTransportClient)1