Search in sources :

Example 1 with WorkerId

use of com.tencent.angel.worker.WorkerId in project angel by Tencent.

the class PSAgentTest method setup.

@BeforeClass
public static void setup() throws Exception {
    try {
        // set basic configuration keys
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.mapper.new-api", true);
        conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
        conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
        // use local deploy mode and dummy dataspliter
        conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
        conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
        // conf.setInt(AngelConf.ANGEL_PREPROCESS_VECTOR_MAXDIM, 10000);
        conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
        conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
        conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
        conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
        conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
        // get a angel client
        angelClient = AngelClientFactory.get(conf);
        // add matrix
        MatrixContext mMatrix = new MatrixContext();
        mMatrix.setName("w1");
        mMatrix.setRowNum(1);
        mMatrix.setColNum(100000);
        mMatrix.setMaxRowNumInBlock(1);
        mMatrix.setMaxColNumInBlock(50000);
        mMatrix.setRowType(RowType.T_DOUBLE_DENSE);
        mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
        mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
        angelClient.addMatrix(mMatrix);
        MatrixContext mMatrix2 = new MatrixContext();
        mMatrix2.setName("w2");
        mMatrix2.setRowNum(1);
        mMatrix2.setColNum(100000);
        mMatrix2.setMaxRowNumInBlock(1);
        mMatrix2.setMaxColNumInBlock(50000);
        mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
        mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "true");
        mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
        angelClient.addMatrix(mMatrix2);
        angelClient.startPSServer();
        angelClient.run();
        Thread.sleep(10000);
        group0Id = new WorkerGroupId(0);
        worker0Id = new WorkerId(group0Id, 0);
        worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
        task0Id = new TaskId(0);
        task1Id = new TaskId(1);
        psId = new ParameterServerId(0);
        psAttempt0Id = new PSAttemptId(psId, 0);
    } catch (Exception x) {
        LOG.error("setup failed ", x);
        throw x;
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) DummyTask(com.tencent.angel.master.DummyTask) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) TaskId(com.tencent.angel.worker.task.TaskId) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) BeforeClass(org.junit.BeforeClass)

Example 2 with WorkerId

use of com.tencent.angel.worker.WorkerId in project angel by Tencent.

the class LocalClusterHelper method setup.

public static void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy data spliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add matrix
    MatrixContext mMatrix = new MatrixContext();
    mMatrix.setName("w1");
    mMatrix.setRowNum(10);
    mMatrix.setColNum(1000);
    mMatrix.setMaxRowNumInBlock(10);
    mMatrix.setMaxColNumInBlock(500);
    mMatrix.setRowType(RowType.T_INT_DENSE);
    mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
    mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
    mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
    mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
    angelClient.addMatrix(mMatrix);
    MatrixContext mMatrix2 = new MatrixContext();
    mMatrix2.setName("w2");
    mMatrix2.setRowNum(10);
    mMatrix2.setColNum(100);
    mMatrix2.setMaxRowNumInBlock(5);
    mMatrix2.setMaxColNumInBlock(50);
    mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
    mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
    mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
    mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
    mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
    angelClient.addMatrix(mMatrix2);
    angelClient.startPSServer();
    angelClient.run();
    Thread.sleep(2 * 1000);
    group0Id = new WorkerGroupId(0);
    worker0Id = new WorkerId(group0Id, 0);
    worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
    task0Id = new TaskId(0);
    task1Id = new TaskId(1);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) TaskId(com.tencent.angel.worker.task.TaskId) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId)

Example 3 with WorkerId

use of com.tencent.angel.worker.WorkerId in project angel by Tencent.

the class WorkerManager method initWorkers.

private void initWorkers() {
    int base = 0;
    // init all tasks , workers and worker groups and put them to the corresponding maps
    for (int i = 0; i < workergroupNumber; i++) {
        Map<WorkerId, AMWorker> workers = new HashMap<WorkerId, AMWorker>();
        WorkerId leader = null;
        WorkerGroupId groupId = new WorkerGroupId(i);
        for (int j = 0; j < workersInGroup; j++) {
            base = (i * workersInGroup + j) * taskNumberInEachWorker;
            List<TaskId> taskIds = new ArrayList<TaskId>(taskNumberInEachWorker);
            for (int k = 0; k < taskNumberInEachWorker && (base < totalTaskNumber); k++, base++) {
                taskIds.add(new TaskId(base));
            }
            WorkerId workerId = new WorkerId(groupId, i * workersInGroup + j);
            AMWorker worker = new AMWorker(workerId, context, taskIds);
            workersMap.put(workerId, worker);
            workers.put(workerId, worker);
            if (j == 0) {
                leader = workerId;
            }
        }
        AMWorkerGroup group = new AMWorkerGroup(groupId, context, workers, leader, i);
        for (WorkerId id : workers.keySet()) {
            findWorkerGroupMap.put(id, group);
            for (TaskId taskId : workers.get(id).getTaskIds()) {
                taskIdToWorkerMap.put(taskId, workers.get(id));
            }
        }
        workerGroupMap.put(groupId, group);
        group.handle(new AMWorkerGroupEvent(AMWorkerGroupEventType.INIT, groupId));
    }
    LOG.info("to init taskClockManager!");
}
Also used : AMWorkerGroup(com.tencent.angel.master.worker.workergroup.AMWorkerGroup) TaskId(com.tencent.angel.worker.task.TaskId) AMWorkerGroupEvent(com.tencent.angel.master.worker.workergroup.AMWorkerGroupEvent) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId)

Example 4 with WorkerId

use of com.tencent.angel.worker.WorkerId in project angel by Tencent.

the class MasterServiceTest method testMasterService.

@Test
public void testMasterService() throws Exception {
    try {
        LOG.info("===========================testMasterService===============================");
        Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
        Location masterLoc = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMasterService().getLocation();
        TConnection connection = TConnectionManager.getConnection(worker.getConf());
        MasterProtocol master = connection.getMasterService(masterLoc.getIp(), masterLoc.getPort());
        // worker register
        WorkerAttemptId worker1Attempt0Id = new WorkerAttemptId(new WorkerId(new WorkerGroupId(1), 0), 0);
        WorkerRegisterRequest registeRequest = WorkerRegisterRequest.newBuilder().setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker1Attempt0Id)).setLocation(LocationProto.newBuilder().setIp("10.10.10.10").setPort(10000).build()).build();
        WorkerRegisterResponse registerResponse = master.workerRegister(null, registeRequest);
        assertTrue(registerResponse.getCommand() == WorkerCommandProto.W_SHUTDOWN);
        WorkerReportRequest.Builder reportBuilder = WorkerReportRequest.newBuilder();
        Pair.Builder kvBuilder = Pair.newBuilder();
        TaskStateProto.Builder taskBuilder = TaskStateProto.newBuilder();
        reportBuilder.setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker0Attempt0Id));
        taskBuilder.setProgress(0.20f);
        taskBuilder.setState("RUNNING");
        taskBuilder.setTaskId(ProtobufUtil.convertToIdProto(task0Id));
        kvBuilder.setKey("task_key1");
        kvBuilder.setValue("100");
        taskBuilder.addCounters(kvBuilder.build());
        kvBuilder.setKey("task_key2");
        kvBuilder.setValue("200");
        taskBuilder.addCounters(kvBuilder.build());
        reportBuilder.addTaskReports(taskBuilder.build());
        taskBuilder.setProgress(0.30f);
        taskBuilder.setState("RUNNING");
        taskBuilder.setTaskId(ProtobufUtil.convertToIdProto(task1Id));
        kvBuilder.setKey("task_key1");
        kvBuilder.setValue("1000");
        taskBuilder.addCounters(kvBuilder.build());
        kvBuilder.setKey("task_key2");
        kvBuilder.setValue("2000");
        taskBuilder.addCounters(kvBuilder.build());
        reportBuilder.addTaskReports(taskBuilder.build());
        kvBuilder.setKey("worker_key1");
        kvBuilder.setValue("100");
        reportBuilder.addPairs(kvBuilder.build());
        kvBuilder.setKey("worker_key2");
        kvBuilder.setValue("200");
        reportBuilder.addPairs(kvBuilder.build());
        WorkerReportResponse reportResponse = master.workerReport(null, reportBuilder.build());
        assertTrue(reportResponse.getCommand() == WorkerCommandProto.W_SUCCESS);
        assertEquals(reportResponse.getActiveTaskNum(), 2);
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        WorkerAttempt worker0Attempt = angelAppMaster.getAppContext().getWorkerManager().getWorker(worker0Attempt0Id.getWorkerId()).getWorkerAttempt(worker0Attempt0Id);
        assertTrue(worker0Attempt != null);
        Map<String, String> workerMetrics = worker0Attempt.getMetrics();
        String valueForWorkerKey1 = workerMetrics.get("worker_key1");
        String valueForWorkerKey2 = workerMetrics.get("worker_key2");
        assertNotNull(valueForWorkerKey1);
        assertNotNull(valueForWorkerKey2);
        assertEquals(valueForWorkerKey1, "100");
        assertEquals(valueForWorkerKey2, "200");
        AMTaskManager amTaskManager = angelAppMaster.getAppContext().getTaskManager();
        AMTask task0 = amTaskManager.getTask(task0Id);
        AMTask task1 = amTaskManager.getTask(task1Id);
        assertTrue(task0 != null);
        assertTrue(task1 != null);
        Map<String, String> task0Metrics = task0.getMetrics();
        Map<String, String> task1Metrics = task1.getMetrics();
        String valueForTask0Key1 = task0Metrics.get("task_key1");
        String valueForTask0Key2 = task0Metrics.get("task_key2");
        String valueForTask1Key1 = task1Metrics.get("task_key1");
        String valueForTask1Key2 = task1Metrics.get("task_key2");
        assertTrue(valueForTask0Key1 != null);
        assertTrue(valueForTask0Key2 != null);
        assertTrue(valueForTask1Key1 != null);
        assertTrue(valueForTask1Key2 != null);
        assertEquals(valueForTask0Key1, "100");
        assertEquals(valueForTask0Key2, "200");
        assertEquals(valueForTask1Key1, "1000");
        assertEquals(valueForTask1Key2, "2000");
        assertEquals(task0.getProgress(), 0.20f, 0.000001);
        assertEquals(task1.getProgress(), 0.30f, 0.000001);
    } catch (Exception x) {
        LOG.error("run testMasterService failed ", x);
        throw x;
    }
}
Also used : WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) TConnection(com.tencent.angel.ipc.TConnection) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) Worker(com.tencent.angel.worker.Worker) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt) AMTask(com.tencent.angel.master.task.AMTask) Location(com.tencent.angel.common.location.Location) Pair(com.tencent.angel.protobuf.generated.MLProtos.Pair) Test(org.junit.Test)

Example 5 with WorkerId

use of com.tencent.angel.worker.WorkerId in project angel by Tencent.

the class MasterServiceTest method setup.

@Before
public void setup() throws Exception {
    try {
        // set basic configuration keys
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.mapper.new-api", true);
        conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
        conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
        // use local deploy mode and dummy dataspliter
        conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
        conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
        conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
        conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
        conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
        conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
        conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
        // get a angel client
        angelClient = AngelClientFactory.get(conf);
        // add matrix
        MatrixContext mMatrix = new MatrixContext();
        mMatrix.setName("w1");
        mMatrix.setRowNum(1);
        mMatrix.setColNum(100000);
        mMatrix.setMaxRowNumInBlock(1);
        mMatrix.setMaxColNumInBlock(50000);
        mMatrix.setRowType(RowType.T_INT_DENSE);
        mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
        mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
        angelClient.addMatrix(mMatrix);
        MatrixContext mMatrix2 = new MatrixContext();
        mMatrix2.setName("w2");
        mMatrix2.setRowNum(1);
        mMatrix2.setColNum(100000);
        mMatrix2.setMaxRowNumInBlock(1);
        mMatrix2.setMaxColNumInBlock(50000);
        mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
        mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
        mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
        angelClient.addMatrix(mMatrix2);
        angelClient.startPSServer();
        angelClient.run();
        Thread.sleep(5000);
        group0Id = new WorkerGroupId(0);
        worker0Id = new WorkerId(group0Id, 0);
        worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
        task0Id = new TaskId(0);
        task1Id = new TaskId(1);
        psId = new ParameterServerId(0);
        psAttempt0Id = new PSAttemptId(psId, 0);
    } catch (Exception x) {
        LOG.error("setup failed ", x);
        throw x;
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) TaskId(com.tencent.angel.worker.task.TaskId) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) Before(org.junit.Before)

Aggregations

WorkerId (com.tencent.angel.worker.WorkerId)15 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)14 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)13 PSAttemptId (com.tencent.angel.ps.PSAttemptId)11 ParameterServerId (com.tencent.angel.ps.ParameterServerId)11 Configuration (org.apache.hadoop.conf.Configuration)10 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)10 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)9 TaskId (com.tencent.angel.worker.task.TaskId)9 Before (org.junit.Before)5 Test (org.junit.Test)4 AngelException (com.tencent.angel.exception.AngelException)3 Worker (com.tencent.angel.worker.Worker)3 BeforeClass (org.junit.BeforeClass)3 ServiceException (com.google.protobuf.ServiceException)2 Location (com.tencent.angel.common.location.Location)2 DummyTask (com.tencent.angel.master.DummyTask)2 AMWorker (com.tencent.angel.master.worker.worker.AMWorker)2 DenseIntVector (com.tencent.angel.ml.math.vector.DenseIntVector)2 PSLocation (com.tencent.angel.ml.matrix.transport.PSLocation)2