Search in sources :

Example 1 with AMTaskManager

use of com.tencent.angel.master.task.AMTaskManager in project angel by Tencent.

the class PSAgentTest method testClockCache.

@Test
public void testClockCache() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        assertTrue(worker != null);
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        ClockCache clockCache = psAgent.getClockCache();
        assertTrue(clockCache != null);
        int rowClock = clockCache.getClock(1, 0);
        assertEquals(rowClock, 0);
    } catch (Exception x) {
        LOG.error("run testClockCache failed ", x);
        throw x;
    }
}
Also used : WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) ClockCache(com.tencent.angel.psagent.clock.ClockCache) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) Worker(com.tencent.angel.worker.Worker) Test(org.junit.Test)

Example 2 with AMTaskManager

use of com.tencent.angel.master.task.AMTaskManager in project angel by Tencent.

the class PSAgentTest method testPSAgentContext.

@Test
public void testPSAgentContext() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        assertTrue(worker != null);
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentContext psAgentContext = PSAgentContext.get();
        assertTrue(psAgentContext.getPsAgent() != null);
        assertTrue(psAgentContext.getConf() != null);
        assertTrue(psAgentContext.getMetrics() != null);
        assertTrue(psAgentContext.getMasterClient() != null);
        assertTrue(psAgentContext.getIdProto() != null);
        assertTrue(psAgentContext.getOpLogCache() != null);
        assertTrue(psAgentContext.getMatrixTransportClient() != null);
        assertTrue(psAgentContext.getMatrixMetaManager() != null);
        assertTrue(psAgentContext.getMatrixMetaManager() != null);
        assertTrue(psAgentContext.getLocationManager() != null);
        assertEquals(psAgentContext.getRunningMode(), psAgent.getRunningMode());
        assertEquals(psAgentContext.getIp(), psAgent.getIp());
        assertEquals(psAgentContext.getStaleness(), psAgent.getConf().getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS));
        assertEquals(psAgentContext.getConsistencyController(), psAgent.getConsistencyController());
        assertEquals(psAgentContext.getMatrixOpLogCache(), psAgent.getOpLogCache());
        assertEquals(psAgentContext.getClockCache(), psAgent.getClockCache());
        assertEquals(psAgentContext.getMatricesCache(), psAgent.getMatricesCache());
        assertEquals(psAgentContext.getMatrixStorageManager(), psAgent.getMatrixStorageManager());
        assertEquals(psAgentContext.getMatrixClientAdapter(), psAgent.getMatrixClientAdapter());
        assertEquals(psAgentContext.getExecutor(), psAgent.getExecutor());
        assertTrue(psAgentContext.getTaskContext(1) != null);
        assertTrue(psAgentContext.getTaskContext(2) != null);
        int taskNum = psAgentContext.getTotalTaskNum();
        assertEquals(taskNum, 2);
        int localTaskNum = psAgentContext.getLocalTaskNum();
        assertEquals(localTaskNum, 2);
    } catch (Exception x) {
        LOG.error("run testPSAgentContext failed ", x);
        throw x;
    }
}
Also used : WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) Worker(com.tencent.angel.worker.Worker) Test(org.junit.Test)

Example 3 with AMTaskManager

use of com.tencent.angel.master.task.AMTaskManager in project angel by Tencent.

the class PSAgentTest method testTaskContext.

@Test
public void testTaskContext() throws Exception {
    try {
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
        assertTrue(taskManager != null);
        WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
        assertTrue(workerManager != null);
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        assertTrue(worker != null);
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentContext psAgentContext = PSAgentContext.get();
        assertTrue(psAgentContext.getPsAgent() != null);
        TaskContext taskContext1 = psAgentContext.getTaskContext(1);
        TaskContext taskContext2 = psAgentContext.getTaskContext(2);
        assertTrue(taskContext1 != null);
        assertTrue(taskContext2 != null);
        assertEquals(taskContext1.getIndex(), 1);
        assertEquals(taskContext2.getIndex(), 2);
        assertEquals(taskContext1.getEpoch(), 0);
        assertEquals(taskContext2.getEpoch(), 0);
        assertEquals(taskContext1.getMatrixClock(1), 0);
        assertEquals(taskContext2.getMatrixClock(2), 0);
        assertEquals(taskContext1.getMatrixClocks().size(), 1);
        assertEquals(taskContext2.getMatrixClocks().size(), 1);
        assertEquals(taskContext1.getProgress(), 0.0, 1e-5);
        assertEquals(taskContext2.getProgress(), 0.0, 1e-5);
    } catch (Exception x) {
        LOG.error("run testTaskContext failed ", x);
        throw x;
    }
}
Also used : WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) TaskContext(com.tencent.angel.psagent.task.TaskContext) AngelApplicationMaster(com.tencent.angel.master.AngelApplicationMaster) Worker(com.tencent.angel.worker.Worker) Test(org.junit.Test)

Example 4 with AMTaskManager

use of com.tencent.angel.master.task.AMTaskManager in project angel by Tencent.

the class AppStateStorage method loadTaskMeta.

/**
 * load task meta from file
 * @return AMTaskManager task meta storage
 * @throws IOException
 */
public AMTaskManager loadTaskMeta() throws IOException {
    try {
        taskMetaLock.lock();
        // find task meta file which has max timestamp
        Path taskMetaFilePath = null;
        try {
            taskMetaFilePath = findFilePathUsePrefix(taskMetaFileNamePrefix);
        } catch (Exception x) {
            LOG.error("find task meta file failed.", x);
            return null;
        }
        // if the file does not exist, just return null
        if (taskMetaFilePath == null) {
            return null;
        }
        // read task meta from file and deserialize it
        FSDataInputStream inputStream = fs.open(taskMetaFilePath);
        AMTaskManager taskManager = new AMTaskManager();
        taskManager.deserialize(inputStream);
        inputStream.close();
        return taskManager;
    } finally {
        taskMetaLock.unlock();
    }
}
Also used : AMTaskManager(com.tencent.angel.master.task.AMTaskManager) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) InvalidParameterException(com.tencent.angel.exception.InvalidParameterException)

Example 5 with AMTaskManager

use of com.tencent.angel.master.task.AMTaskManager in project angel by Tencent.

the class MasterServiceTest method testMasterService.

@Test
public void testMasterService() throws Exception {
    try {
        LOG.info("===========================testMasterService===============================");
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        Location masterLoc = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMasterService().getLocation();
        TConnection connection = TConnectionManager.getConnection(worker.getConf());
        MasterProtocol master = connection.getMasterService(masterLoc.getIp(), masterLoc.getPort());
        // worker register
        WorkerAttemptId worker1Attempt0Id = new WorkerAttemptId(new WorkerId(new WorkerGroupId(1), 0), 0);
        WorkerRegisterRequest registeRequest = WorkerRegisterRequest.newBuilder().setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker1Attempt0Id)).setLocation(LocationProto.newBuilder().setIp("10.10.10.10").setPort(10000).build()).build();
        WorkerRegisterResponse registerResponse = master.workerRegister(null, registeRequest);
        assertTrue(registerResponse.getCommand() == WorkerCommandProto.W_SHUTDOWN);
        WorkerReportRequest.Builder reportBuilder = WorkerReportRequest.newBuilder();
        Pair.Builder kvBuilder = Pair.newBuilder();
        TaskStateProto.Builder taskBuilder = TaskStateProto.newBuilder();
        reportBuilder.setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker0Attempt0Id));
        taskBuilder.setProgress(0.20f);
        taskBuilder.setState("RUNNING");
        taskBuilder.setTaskId(ProtobufUtil.convertToIdProto(task0Id));
        kvBuilder.setKey("task_key1");
        kvBuilder.setValue("100");
        taskBuilder.addCounters(kvBuilder.build());
        kvBuilder.setKey("task_key2");
        kvBuilder.setValue("200");
        taskBuilder.addCounters(kvBuilder.build());
        reportBuilder.addTaskReports(taskBuilder.build());
        taskBuilder.setProgress(0.30f);
        taskBuilder.setState("RUNNING");
        taskBuilder.setTaskId(ProtobufUtil.convertToIdProto(task1Id));
        kvBuilder.setKey("task_key1");
        kvBuilder.setValue("1000");
        taskBuilder.addCounters(kvBuilder.build());
        kvBuilder.setKey("task_key2");
        kvBuilder.setValue("2000");
        taskBuilder.addCounters(kvBuilder.build());
        reportBuilder.addTaskReports(taskBuilder.build());
        kvBuilder.setKey("worker_key1");
        kvBuilder.setValue("100");
        reportBuilder.addPairs(kvBuilder.build());
        kvBuilder.setKey("worker_key2");
        kvBuilder.setValue("200");
        reportBuilder.addPairs(kvBuilder.build());
        WorkerReportResponse reportResponse = master.workerReport(null, reportBuilder.build());
        assertTrue(reportResponse.getCommand() == WorkerCommandProto.W_SUCCESS);
        assertEquals(reportResponse.getActiveTaskNum(), 2);
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        WorkerAttempt worker0Attempt = angelAppMaster.getAppContext().getWorkerManager().getWorker(worker0Attempt0Id.getWorkerId()).getWorkerAttempt(worker0Attempt0Id);
        assertTrue(worker0Attempt != null);
        Map<String, String> workerMetrics = worker0Attempt.getMetrics();
        String valueForWorkerKey1 = workerMetrics.get("worker_key1");
        String valueForWorkerKey2 = workerMetrics.get("worker_key2");
        assertNotNull(valueForWorkerKey1);
        assertNotNull(valueForWorkerKey2);
        assertEquals(valueForWorkerKey1, "100");
        assertEquals(valueForWorkerKey2, "200");
        AMTaskManager amTaskManager = angelAppMaster.getAppContext().getTaskManager();
        AMTask task0 = amTaskManager.getTask(task0Id);
        AMTask task1 = amTaskManager.getTask(task1Id);
        assertTrue(task0 != null);
        assertTrue(task1 != null);
        Map<String, String> task0Metrics = task0.getMetrics();
        Map<String, String> task1Metrics = task1.getMetrics();
        String valueForTask0Key1 = task0Metrics.get("task_key1");
        String valueForTask0Key2 = task0Metrics.get("task_key2");
        String valueForTask1Key1 = task1Metrics.get("task_key1");
        String valueForTask1Key2 = task1Metrics.get("task_key2");
        assertTrue(valueForTask0Key1 != null);
        assertTrue(valueForTask0Key2 != null);
        assertTrue(valueForTask1Key1 != null);
        assertTrue(valueForTask1Key2 != null);
        assertEquals(valueForTask0Key1, "100");
        assertEquals(valueForTask0Key2, "200");
        assertEquals(valueForTask1Key1, "1000");
        assertEquals(valueForTask1Key2, "2000");
        assertEquals(task0.getProgress(), 0.20f, 0.000001);
        assertEquals(task1.getProgress(), 0.30f, 0.000001);
    } catch (Exception x) {
        LOG.error("run testMasterService failed ", x);
        throw x;
    }
}
Also used : WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) TConnection(com.tencent.angel.ipc.TConnection) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) Worker(com.tencent.angel.worker.Worker) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt) AMTask(com.tencent.angel.master.task.AMTask) Location(com.tencent.angel.common.location.Location) Pair(com.tencent.angel.protobuf.generated.MLProtos.Pair) Test(org.junit.Test)

Aggregations

AMTaskManager (com.tencent.angel.master.task.AMTaskManager)16 Worker (com.tencent.angel.worker.Worker)13 Test (org.junit.Test)13 WorkerManager (com.tencent.angel.master.worker.WorkerManager)11 AngelApplicationMaster (com.tencent.angel.master.AngelApplicationMaster)9 AMTask (com.tencent.angel.master.task.AMTask)6 Location (com.tencent.angel.common.location.Location)5 MasterClient (com.tencent.angel.psagent.client.MasterClient)5 PSAgentMatrixMetaManager (com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager)4 Matcher (java.util.regex.Matcher)4 Pattern (java.util.regex.Pattern)4 AngelException (com.tencent.angel.exception.AngelException)3 AMWorker (com.tencent.angel.master.worker.worker.AMWorker)3 TaskContext (com.tencent.angel.psagent.task.TaskContext)3 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)3 ServiceException (com.google.protobuf.ServiceException)2 TConnection (com.tencent.angel.ipc.TConnection)2 WorkerAttempt (com.tencent.angel.master.worker.attempt.WorkerAttempt)2 ParameterServerId (com.tencent.angel.ps.ParameterServerId)2 PSAgentLocationManager (com.tencent.angel.psagent.matrix.PSAgentLocationManager)2