Search in sources :

Example 46 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class UpdateRowsTest method setup.

@Before
public void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy dataspliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 2);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
    conf.setBoolean("use.new.split", false);
    conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
    conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
    conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add dense double matrix
    MatrixContext dMat = new MatrixContext();
    dMat.setName(DENSE_DOUBLE_MAT);
    dMat.setRowNum(rowNum);
    dMat.setColNum(feaNum);
    dMat.setMaxRowNumInBlock(blockRowNum);
    dMat.setMaxColNumInBlock(blockColNum);
    dMat.setRowType(RowType.T_DOUBLE_DENSE);
    dMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(dMat);
    // add sparse double matrix
    MatrixContext sMat = new MatrixContext();
    sMat.setName(SPARSE_DOUBLE_MAT);
    sMat.setRowNum(rowNum);
    sMat.setColNum(feaNum);
    sMat.setMaxRowNumInBlock(blockRowNum);
    sMat.setMaxColNumInBlock(blockColNum);
    sMat.setRowType(RowType.T_DOUBLE_SPARSE);
    angelClient.addMatrix(sMat);
    // add dense float matrix
    MatrixContext dfMat = new MatrixContext();
    dfMat.setName(DENSE_FLOAT_MAT);
    dfMat.setRowNum(rowNum);
    dfMat.setColNum(feaNum);
    dfMat.setMaxRowNumInBlock(blockRowNum);
    dfMat.setMaxColNumInBlock(blockColNum);
    dfMat.setRowType(RowType.T_FLOAT_DENSE);
    dfMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(dfMat);
    // add sparse float matrix
    MatrixContext sfMat = new MatrixContext();
    sfMat.setName(SPARSE_FLOAT_MAT);
    sfMat.setRowNum(rowNum);
    sfMat.setColNum(feaNum);
    sfMat.setMaxRowNumInBlock(blockRowNum);
    sfMat.setMaxColNumInBlock(blockColNum);
    sfMat.setRowType(RowType.T_FLOAT_SPARSE);
    angelClient.addMatrix(sfMat);
    // add dense float matrix
    MatrixContext diMat = new MatrixContext();
    diMat.setName(DENSE_INT_MAT);
    diMat.setRowNum(rowNum);
    diMat.setColNum(feaNum);
    diMat.setMaxRowNumInBlock(blockRowNum);
    diMat.setMaxColNumInBlock(blockColNum);
    diMat.setRowType(RowType.T_INT_DENSE);
    diMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(diMat);
    // add sparse float matrix
    MatrixContext siMat = new MatrixContext();
    siMat.setName(SPARSE_INT_MAT);
    siMat.setRowNum(rowNum);
    siMat.setColNum(feaNum);
    siMat.setMaxRowNumInBlock(blockRowNum);
    siMat.setMaxColNumInBlock(blockColNum);
    siMat.setRowType(RowType.T_INT_SPARSE);
    angelClient.addMatrix(siMat);
    // add dense long matrix
    MatrixContext dlMat = new MatrixContext();
    dlMat.setName(DENSE_LONG_MAT);
    dlMat.setRowNum(rowNum);
    dlMat.setColNum(feaNum);
    dlMat.setMaxRowNumInBlock(blockRowNum);
    dlMat.setMaxColNumInBlock(blockColNum);
    dlMat.setRowType(RowType.T_LONG_DENSE);
    dlMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(dlMat);
    // add sparse long matrix
    MatrixContext slMat = new MatrixContext();
    slMat.setName(SPARSE_LONG_MAT);
    slMat.setRowNum(rowNum);
    slMat.setColNum(feaNum);
    slMat.setMaxRowNumInBlock(blockRowNum);
    slMat.setMaxColNumInBlock(blockColNum);
    slMat.setRowType(RowType.T_LONG_SPARSE);
    angelClient.addMatrix(slMat);
    // add sparse long-key double matrix
    MatrixContext dLongKeysMatrix = new MatrixContext();
    dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
    dLongKeysMatrix.setRowNum(rowNum);
    dLongKeysMatrix.setColNum(feaNum);
    dLongKeysMatrix.setMaxRowNumInBlock(blockRowNum);
    dLongKeysMatrix.setMaxColNumInBlock(blockColNum);
    dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
    angelClient.addMatrix(dLongKeysMatrix);
    // add sparse long-key float matrix
    MatrixContext slfMatrix = new MatrixContext();
    slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
    slfMatrix.setRowNum(rowNum);
    slfMatrix.setColNum(feaNum);
    slfMatrix.setMaxRowNumInBlock(blockRowNum);
    slfMatrix.setMaxColNumInBlock(blockColNum);
    slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
    angelClient.addMatrix(slfMatrix);
    // add sparse long-key int matrix
    MatrixContext sliMatrix = new MatrixContext();
    sliMatrix.setName(SPARSE_INT_LONG_MAT);
    sliMatrix.setRowNum(rowNum);
    sliMatrix.setColNum(feaNum);
    sliMatrix.setMaxRowNumInBlock(blockRowNum);
    sliMatrix.setMaxColNumInBlock(blockColNum);
    sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
    angelClient.addMatrix(sliMatrix);
    // add sparse long-key long matrix
    MatrixContext sllMatrix = new MatrixContext();
    sllMatrix.setName(SPARSE_LONG_LONG_MAT);
    sllMatrix.setRowNum(rowNum);
    sllMatrix.setColNum(feaNum);
    sllMatrix.setMaxRowNumInBlock(blockRowNum);
    sllMatrix.setMaxColNumInBlock(blockColNum);
    sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
    angelClient.addMatrix(sllMatrix);
    // Start PS
    angelClient.startPSServer();
    // Start to run application
    angelClient.run();
    Thread.sleep(5000);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
    WorkerGroupId workerGroupId = new WorkerGroupId(0);
    workerId = new WorkerId(workerGroupId, 0);
    workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) Before(org.junit.Before)

Example 47 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class MatrixMetaManagerTest method setup.

@BeforeClass
public static void setup() throws Exception {
    try {
        // set basic configuration keys
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.mapper.new-api", true);
        conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
        conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
        // use local deploy mode and dummy dataspliter
        conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
        conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
        conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
        conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
        conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
        conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
        conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
        conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
        conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
        conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
        // get a angel client
        angelClient = AngelClientFactory.get(conf);
        // add matrix
        MatrixContext mMatrix = new MatrixContext();
        mMatrix.setName("w1");
        mMatrix.setRowNum(1);
        mMatrix.setColNum(100000);
        mMatrix.setMaxRowNumInBlock(1);
        mMatrix.setMaxColNumInBlock(50000);
        mMatrix.setRowType(RowType.T_INT_DENSE);
        mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
        mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, RowType.T_DOUBLE_DENSE.name());
        angelClient.addMatrix(mMatrix);
        MatrixContext mMatrix2 = new MatrixContext();
        mMatrix2.setName("w2");
        mMatrix2.setRowNum(1);
        mMatrix2.setColNum(100000);
        mMatrix2.setMaxRowNumInBlock(1);
        mMatrix2.setMaxColNumInBlock(50000);
        mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
        mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
        mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, RowType.T_DOUBLE_DENSE.name());
        angelClient.addMatrix(mMatrix2);
        MatrixContext mMatrix3 = new MatrixContext();
        mMatrix3.setName("w3");
        mMatrix3.setRowNum(1);
        mMatrix3.setColNum(100000);
        mMatrix3.setMaxRowNumInBlock(1);
        mMatrix3.setMaxColNumInBlock(50000);
        mMatrix3.setRowType(RowType.T_FLOAT_SPARSE);
        angelClient.addMatrix(mMatrix2);
        angelClient.startPSServer();
        angelClient.run();
        Thread.sleep(5000);
        group0Id = new WorkerGroupId(0);
        worker0Id = new WorkerId(group0Id, 0);
        worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
        task0Id = new TaskId(0);
        task1Id = new TaskId(1);
        psId = new ParameterServerId(0);
        psAttempt0Id = new PSAttemptId(psId, 0);
    } catch (Exception x) {
        LOG.error("setup failed ", x);
        throw x;
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) TaskId(com.tencent.angel.worker.task.TaskId) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerId(com.tencent.angel.worker.WorkerId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) AngelException(com.tencent.angel.exception.AngelException) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) BeforeClass(org.junit.BeforeClass)

Example 48 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class GetRowTest method setup.

@Before
public void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy dataspliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
    conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
    conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
    conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add dense double matrix
    MatrixContext dMat = new MatrixContext();
    dMat.setName(DENSE_DOUBLE_MAT);
    dMat.setRowNum(1);
    dMat.setColNum(feaNum);
    dMat.setMaxColNumInBlock(colmunSize);
    dMat.setRowType(RowType.T_DOUBLE_DENSE);
    dMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(dMat);
    // add sparse double matrix
    MatrixContext sMat = new MatrixContext();
    sMat.setName(SPARSE_DOUBLE_MAT);
    sMat.setRowNum(1);
    sMat.setColNum(feaNum);
    sMat.setMaxColNumInBlock(colmunSize);
    sMat.setRowType(RowType.T_DOUBLE_SPARSE);
    sMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sMat);
    // add dense float matrix
    MatrixContext dfMat = new MatrixContext();
    dfMat.setName(DENSE_FLOAT_MAT);
    dfMat.setRowNum(1);
    dfMat.setColNum(feaNum);
    dfMat.setMaxColNumInBlock(colmunSize);
    dfMat.setRowType(RowType.T_FLOAT_DENSE);
    dfMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(dfMat);
    // add sparse float matrix
    MatrixContext sfMat = new MatrixContext();
    sfMat.setName(SPARSE_FLOAT_MAT);
    sfMat.setRowNum(1);
    sfMat.setColNum(feaNum);
    sfMat.setMaxColNumInBlock(colmunSize);
    sfMat.setRowType(RowType.T_FLOAT_SPARSE);
    angelClient.addMatrix(sfMat);
    // add dense float matrix
    MatrixContext diMat = new MatrixContext();
    diMat.setName(DENSE_INT_MAT);
    diMat.setRowNum(1);
    diMat.setColNum(feaNum);
    diMat.setMaxColNumInBlock(colmunSize);
    diMat.setRowType(RowType.T_INT_DENSE);
    diMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(diMat);
    // add sparse float matrix
    MatrixContext siMat = new MatrixContext();
    siMat.setName(SPARSE_INT_MAT);
    siMat.setRowNum(1);
    siMat.setColNum(feaNum);
    siMat.setMaxColNumInBlock(colmunSize);
    siMat.setRowType(RowType.T_INT_SPARSE);
    angelClient.addMatrix(siMat);
    // add dense long matrix
    MatrixContext dlMat = new MatrixContext();
    dlMat.setName(DENSE_LONG_MAT);
    dlMat.setRowNum(1);
    dlMat.setColNum(feaNum);
    dlMat.setMaxColNumInBlock(colmunSize);
    dlMat.setRowType(RowType.T_LONG_DENSE);
    dlMat.setPartitionerClass(ColumnRangePartitioner.class);
    angelClient.addMatrix(dlMat);
    // add sparse long matrix
    MatrixContext slMat = new MatrixContext();
    slMat.setName(SPARSE_LONG_MAT);
    slMat.setRowNum(1);
    slMat.setColNum(feaNum);
    slMat.setMaxColNumInBlock(colmunSize);
    slMat.setRowType(RowType.T_LONG_SPARSE);
    angelClient.addMatrix(slMat);
    // add sparse long-key double matrix
    MatrixContext dLongKeysMatrix = new MatrixContext();
    dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
    dLongKeysMatrix.setRowNum(1);
    dLongKeysMatrix.setColNum(feaNum);
    dLongKeysMatrix.setMaxColNumInBlock(colmunSize);
    dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
    angelClient.addMatrix(dLongKeysMatrix);
    // add sparse long-key float matrix
    MatrixContext slfMatrix = new MatrixContext();
    slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
    slfMatrix.setRowNum(1);
    slfMatrix.setColNum(feaNum);
    slfMatrix.setMaxColNumInBlock(colmunSize);
    slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
    angelClient.addMatrix(slfMatrix);
    // add sparse long-key int matrix
    MatrixContext sliMatrix = new MatrixContext();
    sliMatrix.setName(SPARSE_INT_LONG_MAT);
    sliMatrix.setRowNum(1);
    sliMatrix.setColNum(feaNum);
    sliMatrix.setMaxColNumInBlock(colmunSize);
    sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
    angelClient.addMatrix(sliMatrix);
    // add sparse long-key long matrix
    MatrixContext sllMatrix = new MatrixContext();
    sllMatrix.setName(SPARSE_LONG_LONG_MAT);
    sllMatrix.setRowNum(1);
    sllMatrix.setColNum(feaNum);
    sllMatrix.setMaxColNumInBlock(colmunSize);
    sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
    angelClient.addMatrix(sllMatrix);
    // Start PS
    angelClient.startPSServer();
    // Start to run application
    angelClient.run();
    Thread.sleep(5000);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
    WorkerGroupId workerGroupId = new WorkerGroupId(0);
    workerId = new WorkerId(workerGroupId, 0);
    workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) Before(org.junit.Before)

Example 49 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class PSManagerTest method testPSManager.

@Test
public void testPSManager() throws Exception {
    try {
        LOG.info("===========================testPSManager===============================");
        AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
        assertTrue(angelAppMaster != null);
        ParameterServerManager psManager = angelAppMaster.getAppContext().getParameterServerManager();
        Map<ParameterServerId, AMParameterServer> psMap = psManager.getParameterServerMap();
        assertEquals(psMap.size(), 1);
        AMParameterServer ps = psMap.get(psId);
        assertTrue(ps != null);
        assertEquals(ps.getId(), psId);
        assertEquals(ps.getState(), AMParameterServerState.RUNNING);
        Map<PSAttemptId, PSAttempt> psAttempts = ps.getPSAttempts();
        assertEquals(psAttempts.size(), 1);
        PSAttempt psAttempt = psAttempts.get(psAttempt0Id);
        assertEquals(psAttempt.getInternalState(), PSAttemptStateInternal.RUNNING);
    } catch (Exception x) {
        LOG.error("run testPSManager failed ", x);
        throw x;
    }
}
Also used : AMParameterServer(com.tencent.angel.master.ps.ps.AMParameterServer) PSAttemptId(com.tencent.angel.ps.PSAttemptId) PSAttempt(com.tencent.angel.master.ps.attempt.PSAttempt) ParameterServerManager(com.tencent.angel.master.ps.ParameterServerManager) ParameterServerId(com.tencent.angel.ps.ParameterServerId) AngelException(com.tencent.angel.exception.AngelException) Test(org.junit.Test)

Example 50 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMModelSaver method combineMatrix.

/**
 * Combine all output files of a model to a combine directory
 *
 * @param matrixContext matrix save context
 * @param errorLogs error logs
 */
private void combineMatrix(ModelSaveContext saveContext, MatrixSaveContext matrixContext, Vector<String> errorLogs, Path tmpCombinePath, FileSystem fs) {
    LOG.info("start commit matrix " + matrixContext.getMatrixName());
    // Init matrix files meta
    int matrixId = context.getMatrixMetaManager().getMatrix(matrixContext.getMatrixName()).getId();
    List<ParameterServerId> psIds = new ArrayList<>(context.getMatrixMetaManager().getMasterPsIds(matrixId));
    MatrixMeta meta = context.getMatrixMetaManager().getMatrix(matrixId);
    Map<String, String> kvMap = meta.getAttributes();
    MatrixFilesMeta filesMeta = new MatrixFilesMeta(matrixId, meta.getName(), matrixContext.getFormatClassName(), meta.getRowType().getNumber(), meta.getRowNum(), meta.getColNum(), meta.getBlockRowNum(), meta.getBlockColNum(), kvMap);
    filesMeta.setFeatureIndexStart(meta.getIndexStart());
    filesMeta.setFeatureIndexEnd(meta.getIndexEnd());
    try {
        // Move output files
        Path srcPath = new Path(saveContext.getTmpSavePath(), ModelFilesConstent.resultDirName);
        Path destPath = new Path(tmpCombinePath, meta.getName());
        PSModelCombineOp partCombineOp = new PSModelCombineOp(srcPath, destPath, psIds, errorLogs, filesMeta, 0, psIds.size(), fs);
        fileOpExecutor.execute(partCombineOp);
        partCombineOp.join();
        // Write the meta file
        long startTs = System.currentTimeMillis();
        Path metaFile = new Path(destPath, ModelFilesConstent.modelMetaFileName);
        Path tmpMetaFile = HdfsUtil.toTmpPath(metaFile);
        FSDataOutputStream metaOut = fs.create(tmpMetaFile);
        filesMeta.write(metaOut);
        metaOut.flush();
        metaOut.close();
        HdfsUtil.rename(tmpMetaFile, metaFile, fs);
        LOG.info("commit meta file use time=" + (System.currentTimeMillis() - startTs));
    } catch (Throwable x) {
        errorLogs.add("move output files for matrix " + meta.getName() + " failed, error msg = " + x.getMessage());
        LOG.error("move output files for matrix " + meta.getName() + " failed.", x);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) ArrayList(java.util.ArrayList) PSMatrixFilesMeta(com.tencent.angel.model.output.format.PSMatrixFilesMeta) MatrixFilesMeta(com.tencent.angel.model.output.format.MatrixFilesMeta) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Aggregations

ParameterServerId (com.tencent.angel.ps.ParameterServerId)65 PSAttemptId (com.tencent.angel.ps.PSAttemptId)33 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)28 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)28 WorkerId (com.tencent.angel.worker.WorkerId)28 Configuration (org.apache.hadoop.conf.Configuration)28 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)27 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)27 Before (org.junit.Before)23 TaskId (com.tencent.angel.worker.task.TaskId)9 PSLocation (com.tencent.angel.ps.server.data.PSLocation)6 HashMap (java.util.HashMap)6 Location (com.tencent.angel.common.location.Location)5 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)5 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)5 ArrayList (java.util.ArrayList)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 Path (org.apache.hadoop.fs.Path)5 Test (org.junit.Test)5 AMParameterServer (com.tencent.angel.master.ps.ps.AMParameterServer)4