Search in sources :

Example 11 with WorkerAttemptId

use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.

the class GetRowHashTest method setup.

@Before
public void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy dataspliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
    conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
    conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
    conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add sparse double matrix
    MatrixContext sMat = new MatrixContext();
    sMat.setName(SPARSE_DOUBLE_MAT);
    sMat.setRowNum(1);
    sMat.setRowType(RowType.T_DOUBLE_SPARSE);
    sMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sMat);
    // add sparse float matrix
    MatrixContext sfMat = new MatrixContext();
    sfMat.setName(SPARSE_FLOAT_MAT);
    sfMat.setRowNum(1);
    sfMat.setRowType(RowType.T_FLOAT_SPARSE);
    sfMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sfMat);
    // add sparse float matrix
    MatrixContext siMat = new MatrixContext();
    siMat.setName(SPARSE_INT_MAT);
    siMat.setRowNum(1);
    siMat.setRowType(RowType.T_INT_SPARSE);
    siMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(siMat);
    // add sparse long matrix
    MatrixContext slMat = new MatrixContext();
    slMat.setName(SPARSE_LONG_MAT);
    slMat.setRowNum(1);
    slMat.setRowType(RowType.T_LONG_SPARSE);
    slMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(slMat);
    // add sparse long-key double matrix
    MatrixContext dLongKeysMatrix = new MatrixContext();
    dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
    dLongKeysMatrix.setRowNum(1);
    dLongKeysMatrix.setMaxColNumInBlock(colmunSize);
    dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
    dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(dLongKeysMatrix);
    // add sparse long-key float matrix
    MatrixContext slfMatrix = new MatrixContext();
    slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
    slfMatrix.setRowNum(1);
    slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
    slfMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(slfMatrix);
    // add sparse long-key int matrix
    MatrixContext sliMatrix = new MatrixContext();
    sliMatrix.setName(SPARSE_INT_LONG_MAT);
    sliMatrix.setRowNum(1);
    sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
    sliMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sliMatrix);
    // add sparse long-key long matrix
    MatrixContext sllMatrix = new MatrixContext();
    sllMatrix.setName(SPARSE_LONG_LONG_MAT);
    sllMatrix.setRowNum(1);
    sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
    sllMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sllMatrix);
    // Start PS
    angelClient.startPSServer();
    // Start to run application
    angelClient.run();
    Thread.sleep(5000);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
    WorkerGroupId workerGroupId = new WorkerGroupId(0);
    workerId = new WorkerId(workerGroupId, 0);
    workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) Before(org.junit.Before)

Example 12 with WorkerAttemptId

use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.

the class GetRowsHashTest method setup.

@Before
public void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy dataspliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 2);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
    conf.setBoolean("use.new.split", true);
    conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
    conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
    conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add sparse double matrix
    MatrixContext sMat = new MatrixContext();
    sMat.setName(SPARSE_DOUBLE_MAT);
    sMat.setRowNum(rowNum);
    sMat.setRowType(RowType.T_DOUBLE_SPARSE);
    sMat.setPartitionNum(partNum);
    sMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sMat);
    // add sparse float matrix
    MatrixContext sfMat = new MatrixContext();
    sfMat.setName(SPARSE_FLOAT_MAT);
    sfMat.setRowNum(rowNum);
    sfMat.setRowType(RowType.T_FLOAT_SPARSE);
    sfMat.setPartitionNum(partNum);
    sfMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sfMat);
    // add sparse float matrix
    MatrixContext siMat = new MatrixContext();
    siMat.setName(SPARSE_INT_MAT);
    siMat.setRowNum(rowNum);
    siMat.setRowType(RowType.T_INT_SPARSE);
    siMat.setPartitionNum(partNum);
    siMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(siMat);
    // add sparse long matrix
    MatrixContext slMat = new MatrixContext();
    slMat.setName(SPARSE_LONG_MAT);
    slMat.setRowNum(rowNum);
    slMat.setRowType(RowType.T_LONG_SPARSE);
    slMat.setPartitionNum(partNum);
    slMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(slMat);
    // add sparse long-key double matrix
    MatrixContext dLongKeysMatrix = new MatrixContext();
    dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
    dLongKeysMatrix.setRowNum(rowNum);
    dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
    dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(dLongKeysMatrix);
    // add sparse long-key float matrix
    MatrixContext slfMatrix = new MatrixContext();
    slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
    slfMatrix.setRowNum(rowNum);
    slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
    slfMatrix.setPartitionNum(partNum);
    slfMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(slfMatrix);
    // add sparse long-key int matrix
    MatrixContext sliMatrix = new MatrixContext();
    sliMatrix.setName(SPARSE_INT_LONG_MAT);
    sliMatrix.setRowNum(rowNum);
    sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
    sliMatrix.setPartitionNum(partNum);
    sliMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sliMatrix);
    // add sparse long-key long matrix
    MatrixContext sllMatrix = new MatrixContext();
    sllMatrix.setName(SPARSE_LONG_LONG_MAT);
    sllMatrix.setRowNum(rowNum);
    sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
    sllMatrix.setPartitionNum(partNum);
    sllMatrix.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sllMatrix);
    // Start PS
    angelClient.startPSServer();
    // Start to run application
    angelClient.run();
    Thread.sleep(2000);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
    WorkerGroupId workerGroupId = new WorkerGroupId(0);
    workerId = new WorkerId(workerGroupId, 0);
    workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) Before(org.junit.Before)

Example 13 with WorkerAttemptId

use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.

the class WorkerBlock method render.

@Override
protected void render(Block html) {
    set(TITLE, join("Angel Worker Attempt ", $(WORKER_ATTEMPT_ID)));
    String workerAttemptIdStr = $(WORKER_ATTEMPT_ID);
    if (workerAttemptIdStr == null || workerAttemptIdStr.isEmpty()) {
        html.p()._("Sorry, can't do anything without a WorkerId.")._();
        return;
    }
    WorkerAttemptId workerAttemptId = null;
    try {
        workerAttemptId = new WorkerAttemptId(workerAttemptIdStr);
    } catch (UnvalidIdStrException e) {
        LOG.error("unvalid id string, ", e);
        return;
    }
    AMWorker worker;
    worker = amContext.getWorkerManager().getWorker(workerAttemptId.getWorkerId());
    if (worker == null) {
        html.p()._("Sorry, can't find worker " + workerAttemptId.getWorkerId())._();
        return;
    }
    WorkerAttempt workerAttempt = worker.getWorkerAttempt(workerAttemptId);
    TABLE<DIV<Hamlet>> table = html.div(_INFO_WRAP).table("#job");
    TR<THEAD<TABLE<DIV<Hamlet>>>> headTr = table.thead().tr();
    headTr.th(_TH, "taskid").th(_TH, "state").th(_TH, "current iteration").th(_TH, "current iteration bar").th(_TH, "current progress").th(_TH, "current progress bar").th(_TH, "taskcounters");
    headTr._()._();
    float current_iteration_progress = (float) 0.0;
    float current_clock_progress = (float) 0.0;
    TBODY<TABLE<DIV<Hamlet>>> tbody = table.tbody();
    for (AMTask task : workerAttempt.getTaskMap().values()) {
        if (task.getProgress() >= 0 && task.getProgress() <= 1)
            current_iteration_progress = task.getProgress();
        current_clock_progress = ((float) task.getIteration()) / ((float) amContext.getTotalIterationNum());
        TR<TBODY<TABLE<DIV<Hamlet>>>> tr = tbody.tr();
        tr.td(task.getTaskId().toString()).td(task.getState().toString()).td(String.valueOf(task.getIteration()) + "/" + amContext.getTotalIterationNum()).td().div(_PROGRESSBAR).$title(// tooltip
        join(String.valueOf(current_clock_progress * 100), '%')).div(_PROGRESSBAR_VALUE).$style(join("width:", String.valueOf(current_clock_progress * 100), '%'))._()._()._().td(String.valueOf(current_iteration_progress)).td().div(_PROGRESSBAR).$title(join(String.valueOf(current_iteration_progress * 100), '%')).div(_PROGRESSBAR_VALUE).$style(join("width:", String.valueOf(current_iteration_progress * 100), '%'))._()._()._().td().a(url("angel/taskCountersPage/", task.getTaskId().toString()), "taskcounters")._();
        tr._();
    }
    tbody._()._()._();
}
Also used : Hamlet(org.apache.hadoop.yarn.webapp.hamlet.Hamlet) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) UnvalidIdStrException(com.tencent.angel.exception.UnvalidIdStrException) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) WorkerAttempt(com.tencent.angel.master.worker.attempt.WorkerAttempt) AMTask(com.tencent.angel.master.task.AMTask)

Example 14 with WorkerAttemptId

use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.

the class WorkerThreadStackBlock method render.

@Override
protected void render(Block html) {
    set(TITLE, join("Angel WorkerThreadStack ", $(WORKER_ATTEMPT_ID)));
    try {
        WorkerAttemptId workerAttemptId = new WorkerAttemptId($(WORKER_ATTEMPT_ID));
        WorkerClient workerClient = null;
        LOG.info("start init WorkerClient");
        workerClient = new WorkerClient(amContext, workerAttemptId);
        String info = workerClient.getThreadStack();
        html.pre()._(info)._();
    } catch (IOException | UnvalidIdStrException | ServiceException e) {
        LOG.error("get stack for " + $(WORKER_ATTEMPT_ID) + " failed, ", e);
    }
}
Also used : ServiceException(com.google.protobuf.ServiceException) WorkerClient(com.tencent.angel.master.client.WorkerClient) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) IOException(java.io.IOException) UnvalidIdStrException(com.tencent.angel.exception.UnvalidIdStrException)

Example 15 with WorkerAttemptId

use of com.tencent.angel.worker.WorkerAttemptId in project angel by Tencent.

the class LocalClusterHelper method setup.

public static void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy data spliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
    conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
    conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add matrix
    MatrixContext mMatrix = new MatrixContext();
    mMatrix.setName("w1");
    mMatrix.setRowNum(10);
    mMatrix.setColNum(1000);
    mMatrix.setMaxRowNumInBlock(10);
    mMatrix.setMaxColNumInBlock(500);
    mMatrix.setRowType(RowType.T_INT_DENSE);
    mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
    mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
    mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
    mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
    angelClient.addMatrix(mMatrix);
    MatrixContext mMatrix2 = new MatrixContext();
    mMatrix2.setName("w2");
    mMatrix2.setRowNum(10);
    mMatrix2.setColNum(100);
    mMatrix2.setMaxRowNumInBlock(5);
    mMatrix2.setMaxColNumInBlock(50);
    mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
    mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
    mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
    mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
    mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
    angelClient.addMatrix(mMatrix2);
    angelClient.startPSServer();
    angelClient.run();
    Thread.sleep(2 * 1000);
    group0Id = new WorkerGroupId(0);
    worker0Id = new WorkerId(group0Id, 0);
    worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
    task0Id = new TaskId(0);
    task1Id = new TaskId(1);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) TaskId(com.tencent.angel.worker.task.TaskId) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId)

Aggregations

WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)45 PSAttemptId (com.tencent.angel.ps.PSAttemptId)32 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)31 WorkerId (com.tencent.angel.worker.WorkerId)30 ParameterServerId (com.tencent.angel.ps.ParameterServerId)28 Configuration (org.apache.hadoop.conf.Configuration)27 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)27 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)26 Before (org.junit.Before)22 TaskId (com.tencent.angel.worker.task.TaskId)9 UnvalidIdStrException (com.tencent.angel.exception.UnvalidIdStrException)4 WorkerAttemptEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptEvent)4 Test (org.junit.Test)4 ServiceException (com.google.protobuf.ServiceException)3 Id (com.tencent.angel.common.Id)3 AngelException (com.tencent.angel.exception.AngelException)3 WorkerAttempt (com.tencent.angel.master.worker.attempt.WorkerAttempt)3 WorkerAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptDiagnosticsUpdateEvent)3 Worker (com.tencent.angel.worker.Worker)3 BeforeClass (org.junit.BeforeClass)3