Search in sources :

Example 1 with PSAgentMatrixMetaManager

use of com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager in project angel by Tencent.

the class PSAgent method initAndStart.

public void initAndStart() throws Exception {
    // Get ps locations from master and put them to the location cache.
    locationManager = new PSAgentLocationManager(PSAgentContext.get());
    locationManager.setMasterLocation(masterLocation);
    // Build and initialize rpc client to master
    masterClient = new MasterClient();
    masterClient.init();
    // Build local location
    String localIp = NetUtils.getRealLocalIP();
    int port = NetUtils.chooseAListenPort(conf);
    location = new Location(localIp, port);
    // Initialize matrix meta information
    clockCache = new ClockCache();
    List<MatrixMeta> matrixMetas = masterClient.getMatrices();
    LOG.info("===========================PSAgent get matrices from master," + matrixMetas.size());
    this.matrixMetaManager = new PSAgentMatrixMetaManager(clockCache);
    matrixMetaManager.addMatrices(matrixMetas);
    Map<ParameterServerId, Location> psIdToLocMap = masterClient.getPSLocations();
    List<ParameterServerId> psIds = new ArrayList<>(psIdToLocMap.keySet());
    Collections.sort(psIds, new Comparator<ParameterServerId>() {

        @Override
        public int compare(ParameterServerId s1, ParameterServerId s2) {
            return s1.getIndex() - s2.getIndex();
        }
    });
    int size = psIds.size();
    locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
    for (int i = 0; i < size; i++) {
        if (psIdToLocMap.containsKey(psIds.get(i))) {
            locationManager.setPsLocation(psIds.get(i), psIdToLocMap.get(psIds.get(i)));
        }
    }
    matrixTransClient = new MatrixTransportClient();
    matrixClientAdapter = new MatrixClientAdapter();
    opLogCache = new MatrixOpLogCache();
    matrixStorageManager = new MatrixStorageManager();
    matricesCache = new MatricesCache();
    int staleness = conf.getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
    consistencyController = new ConsistencyController(staleness);
    consistencyController.init();
    psAgentInitFinishedFlag.set(true);
    // Start heartbeat thread if need
    if (needHeartBeat) {
        startHeartbeatThread();
    }
    // Start all services
    matrixTransClient.start();
    matrixClientAdapter.start();
    clockCache.start();
    opLogCache.start();
    matricesCache.start();
}
Also used : MatrixClientAdapter(com.tencent.angel.psagent.matrix.transport.adapter.MatrixClientAdapter) ClockCache(com.tencent.angel.psagent.clock.ClockCache) MasterClient(com.tencent.angel.psagent.client.MasterClient) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) MatricesCache(com.tencent.angel.psagent.matrix.cache.MatricesCache) ConsistencyController(com.tencent.angel.psagent.consistency.ConsistencyController) MatrixTransportClient(com.tencent.angel.psagent.matrix.transport.MatrixTransportClient) MatrixOpLogCache(com.tencent.angel.psagent.matrix.oplog.cache.MatrixOpLogCache) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) MatrixStorageManager(com.tencent.angel.psagent.matrix.storage.MatrixStorageManager) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Location(com.tencent.angel.common.location.Location)

Example 2 with PSAgentMatrixMetaManager

use of com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager in project angel by Tencent.

the class TaskManagerTest method testTaskMatrixClock.

@Test
public void testTaskMatrixClock() throws ServiceException {
    try {
        LOG.info("===========================testTaskMatrixClock===============================");
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        PSAgentMatrixMetaManager matrixMetaManager = worker.getPSAgent().getMatrixMetaManager();
        int w1Id = matrixMetaManager.getMatrixId("w1");
        int w2Id = matrixMetaManager.getMatrixId("w2");
        MasterClient masterClient = worker.getPSAgent().getMasterClient();
        AMTask task0 = taskManager.getTask(task0Id);
        AMTask task1 = taskManager.getTask(task1Id);
        masterClient.updateClock(task0Id.getIndex(), w1Id, 1);
        masterClient.updateClock(task0Id.getIndex(), w2Id, 1);
        Int2IntOpenHashMap matrixClocks = task0.getMatrixClocks();
        assertEquals(matrixClocks.size(), 2);
        assertEquals(matrixClocks.get(w1Id), 1);
        assertEquals(matrixClocks.get(w2Id), 1);
        masterClient.updateClock(task0Id.getIndex(), w1Id, 2);
        assertEquals(task0.getMatrixClock(w1Id), 2);
        assertEquals(task0.getMatrixClock(w2Id), 1);
        masterClient.updateClock(task1Id.getIndex(), w1Id, 1);
        masterClient.updateClock(task1Id.getIndex(), w2Id, 1);
        matrixClocks = task1.getMatrixClocks();
        assertEquals(matrixClocks.size(), 2);
        assertEquals(matrixClocks.get(w1Id), 1);
        assertEquals(matrixClocks.get(w2Id), 1);
        masterClient.updateClock(task1Id.getIndex(), w1Id, 2);
        assertEquals(task1.getMatrixClock(w1Id), 2);
        assertEquals(task1.getMatrixClock(w2Id), 1);
    } catch (Exception x) {
        LOG.error("run testTaskMatrixClock failed ", x);
        throw x;
    }
}
Also used : AMTaskManager(com.tencent.angel.master.task.AMTaskManager) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) MasterClient(com.tencent.angel.psagent.client.MasterClient) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) Worker(com.tencent.angel.worker.Worker) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) AMTask(com.tencent.angel.master.task.AMTask) ServiceException(com.google.protobuf.ServiceException) AngelException(com.tencent.angel.exception.AngelException) Test(org.junit.Test)

Example 3 with PSAgentMatrixMetaManager

use of com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager in project angel by Tencent.

the class PSAgentTest method testMatrixMetaManager.

@Test
public void testMatrixMetaManager() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        assertTrue(worker != null);
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentMatrixMetaManager matrixMetaManager = psAgent.getMatrixMetaManager();
        assertTrue(matrixMetaManager != null);
        // test all matrix ids
        assertEquals(matrixMetaManager.getMatrixMetas().size(), 2);
        // test all matrix names
        assertNotNull(matrixMetaManager.getMatrixMeta("w1"));
        assertNotNull(matrixMetaManager.getMatrixMeta("w2"));
        // test matrix attribute
        int matrixId1 = matrixMetaManager.getMatrixId("w1");
        int matrixId2 = matrixMetaManager.getMatrixId("w2");
        String hogwildAttr = matrixMetaManager.getMatrixMeta(matrixId1).getAttribute(MatrixConf.MATRIX_HOGWILD, "true");
        assertEquals(hogwildAttr, "true");
        hogwildAttr = matrixMetaManager.getMatrixMeta(matrixId2).getAttribute(MatrixConf.MATRIX_HOGWILD, "true");
        assertEquals(hogwildAttr, "true");
        int matrix1Id = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMatrixMetaManager().getMatrix("w1").getId();
        int matrix2Id = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMatrixMetaManager().getMatrix("w2").getId();
        // test matrix meta
        MatrixMeta matrixMetaById = matrixMetaManager.getMatrixMeta(matrix1Id);
        MatrixMeta matrixMetaByName = matrixMetaManager.getMatrixMeta("w1");
        assertEquals(matrixMetaById, matrixMetaByName);
        assertEquals(matrixMetaById.getName(), "w1");
        assertEquals(matrixMetaById.getRowNum(), 1);
        assertEquals(matrixMetaById.getColNum(), 100000);
        assertEquals(matrixMetaById.getRowType(), RowType.T_DOUBLE_DENSE);
        assertEquals(matrixMetaById.getAttribute(MatrixConf.MATRIX_HOGWILD, "true"), "true");
        assertEquals(matrixMetaById.getStaleness(), 0);
    } catch (Exception x) {
        LOG.error("run testMatrixMetaManager failed ", x);
        throw x;
    }
}
Also used : WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) Worker(com.tencent.angel.worker.Worker) Test(org.junit.Test)

Example 4 with PSAgentMatrixMetaManager

use of com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager in project angel by Tencent.

the class PSAgentTest method testMatrixLocationManager.

@Test
public void testMatrixLocationManager() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        assertTrue(worker != null);
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentMatrixMetaManager matrixPartitionRouter = psAgent.getMatrixMetaManager();
        PSAgentLocationManager locationCache = psAgent.getLocationManager();
        assertTrue(matrixPartitionRouter != null);
        // test ps location
        Location psLoc = locationCache.getPsLocation(psId);
        String ipRegex = "(2[5][0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})";
        Pattern pattern = Pattern.compile(ipRegex);
        Matcher matcher = pattern.matcher(psLoc.getIp());
        assertTrue(matcher.matches());
        assertTrue(psLoc.getPort() >= 1 && psLoc.getPort() <= 65535);
        int matrix1Id = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMatrixMetaManager().getMatrix("w1").getId();
        int matrix2Id = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMatrixMetaManager().getMatrix("w2").getId();
        // test partitions
        List<PartitionKey> partition1Keys = matrixPartitionRouter.getPartitions(matrix1Id);
        assertEquals(partition1Keys.size(), 2);
        List<PartitionKey> partition2Keys = matrixPartitionRouter.getPartitions(matrix1Id);
        assertEquals(partition2Keys.size(), 2);
        partition1Keys.clear();
        partition1Keys = matrixPartitionRouter.getPartitions(matrix1Id, 0);
        assertEquals(partition1Keys.size(), 2);
        partition2Keys.clear();
        partition2Keys = matrixPartitionRouter.getPartitions(matrix1Id, 0);
        assertEquals(partition2Keys.size(), 2);
        int rowPartSize = matrixPartitionRouter.getRowPartitionSize(matrix1Id, 0);
        assertEquals(rowPartSize, 2);
        rowPartSize = matrixPartitionRouter.getRowPartitionSize(matrix1Id, 0);
        assertEquals(rowPartSize, 2);
    } catch (Exception x) {
        LOG.error("run testMatrixLocationManager failed ", x);
        throw x;
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) Worker(com.tencent.angel.worker.Worker) PartitionKey(com.tencent.angel.PartitionKey) Location(com.tencent.angel.common.location.Location) Test(org.junit.Test)

Example 5 with PSAgentMatrixMetaManager

use of com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager in project angel by Tencent.

the class PSAgentTest method testMasterClient.

@Test
public void testMasterClient() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        PSAgentMatrixMetaManager matrixMetaManager = worker.getPSAgent().getMatrixMetaManager();
        int w1Id = matrixMetaManager.getMatrixId("w1");
        int w2Id = matrixMetaManager.getMatrixId("w2");
        MasterClient masterClient = worker.getPSAgent().getMasterClient();
        assertTrue(masterClient != null);
        Location location = masterClient.getPSLocation(psId);
        String ipRegex = "(2[5][0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})";
        Pattern pattern = Pattern.compile(ipRegex);
        Matcher matcher = pattern.matcher(location.getIp());
        assertTrue(matcher.matches());
        assertTrue(location.getPort() >= 1 && location.getPort() <= 65535);
        Map<ParameterServerId, Location> psLocations = masterClient.getPSLocations();
        assertEquals(psLocations.size(), 1);
    } catch (Exception x) {
        LOG.error("run testMasterClient failed ", x);
        throw x;
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) MasterClient(com.tencent.angel.psagent.client.MasterClient) WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) Worker(com.tencent.angel.worker.Worker) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Location(com.tencent.angel.common.location.Location) Test(org.junit.Test)

Aggregations

PSAgentMatrixMetaManager (com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager)5 AMTaskManager (com.tencent.angel.master.task.AMTaskManager)4 Worker (com.tencent.angel.worker.Worker)4 Test (org.junit.Test)4 Location (com.tencent.angel.common.location.Location)3 AngelApplicationMaster (com.tencent.angel.master.AngelApplicationMaster)3 WorkerManager (com.tencent.angel.master.worker.WorkerManager)3 MasterClient (com.tencent.angel.psagent.client.MasterClient)3 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)2 ParameterServerId (com.tencent.angel.ps.ParameterServerId)2 PSAgentLocationManager (com.tencent.angel.psagent.matrix.PSAgentLocationManager)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 ServiceException (com.google.protobuf.ServiceException)1 PartitionKey (com.tencent.angel.PartitionKey)1 AngelException (com.tencent.angel.exception.AngelException)1 AMTask (com.tencent.angel.master.task.AMTask)1 AMWorker (com.tencent.angel.master.worker.worker.AMWorker)1 ClockCache (com.tencent.angel.psagent.clock.ClockCache)1 ConsistencyController (com.tencent.angel.psagent.consistency.ConsistencyController)1