Search in sources :

Example 1 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class PSMatrixMetaManager method getPartLocation.

/**
 * Get partition location: includes stored pss and the location of the pss
 * @param partitionKey partition information
 * @return partition location
 * @throws ServiceException
 */
public PartitionLocation getPartLocation(PartitionKey partitionKey) throws ServiceException {
    List<ParameterServerId> psIds = getPss(partitionKey);
    int size = psIds.size();
    List<PSLocation> psLocs = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        psLocs.add(new PSLocation(psIds.get(i), context.getLocationManager().getPsLocation(psIds.get(i))));
    }
    return new PartitionLocation(psLocs);
}
Also used : PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) ArrayList(java.util.ArrayList) ParameterServerId(com.tencent.angel.ps.ParameterServerId) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 2 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class ParameterServerServiceTest method setup.

@Before
public void setup() throws Exception {
    try {
        // set basic configuration keys
        conf = new Configuration();
        conf.setBoolean("mapred.mapper.new-api", true);
        conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
        conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
        // use local deploy mode and dummy dataspliter
        conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
        conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
        conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
        conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
        conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
        conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
        conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
        // get a angel client
        angelClient = AngelClientFactory.get(conf);
        // add matrix
        MatrixContext mMatrix = new MatrixContext();
        mMatrix.setName("w1");
        mMatrix.setRowNum(1);
        mMatrix.setColNum(100000);
        mMatrix.setMaxRowNumInBlock(1);
        mMatrix.setMaxColNumInBlock(50000);
        mMatrix.setRowType(RowType.T_INT_DENSE);
        mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
        mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
        angelClient.addMatrix(mMatrix);
        mMatrix = new MatrixContext();
        mMatrix.setName("w2");
        mMatrix.setRowNum(1);
        mMatrix.setColNum(100000);
        mMatrix.setMaxRowNumInBlock(1);
        mMatrix.setMaxColNumInBlock(50000);
        mMatrix.setRowType(RowType.T_DOUBLE_DENSE);
        mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        mMatrix.set(MatrixConf.MATRIX_HOGWILD, "false");
        mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
        angelClient.addMatrix(mMatrix);
        angelClient.startPSServer();
        angelClient.run();
        Thread.sleep(5000);
        // group0Id = new WorkerGroupId(0);
        // worker0Id = new WorkerId(group0Id, 0);
        // worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
        // task0Id = new TaskId(0);
        // task1Id = new TaskId(1);
        psId = new ParameterServerId(0);
        psAttempt0Id = new PSAttemptId(psId, 0);
    } catch (Exception x) {
        LOG.error("setup failed ", x);
        throw x;
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) DummyTask(com.tencent.angel.master.DummyTask) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Before(org.junit.Before)

Example 3 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class TransportTest method setup.

@BeforeClass
public static void setup() throws Exception {
    try {
        // Set basic configuration keys
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.mapper.new-api", true);
        conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
        conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
        // Use local deploy mode and dummy data spliter
        conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
        conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
        conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
        conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
        conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
        conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
        conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
        conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
        // Create an Angel client
        angelClient = AngelClientFactory.get(conf);
        // Add different types of matrix
        MatrixContext matrix = new MatrixContext();
        matrix.setName("dense_double_mat");
        matrix.setRowNum(ddRow);
        matrix.setColNum(ddCol);
        matrix.setMaxRowNumInBlock(ddRow / 2);
        matrix.setMaxColNumInBlock(ddCol / 2);
        matrix.setRowType(RowType.T_DOUBLE_DENSE);
        matrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        matrix.set(MatrixConf.MATRIX_HOGWILD, "false");
        matrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        matrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
        angelClient.addMatrix(matrix);
        matrix = new MatrixContext();
        matrix.setName("dense_double_mat_1");
        matrix.setRowNum(ddRow);
        matrix.setColNum(ddCol);
        matrix.setMaxRowNumInBlock(ddRow / 2);
        matrix.setMaxColNumInBlock(ddCol / 2);
        matrix.setRowType(RowType.T_DOUBLE_DENSE);
        matrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        matrix.set(MatrixConf.MATRIX_HOGWILD, "false");
        matrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        matrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
        angelClient.addMatrix(matrix);
        matrix = new MatrixContext();
        matrix.setName("dense_int_mat");
        matrix.setRowNum(diRow);
        matrix.setColNum(diCol);
        matrix.setMaxRowNumInBlock(diRow / 2);
        matrix.setMaxColNumInBlock(diCol / 2);
        matrix.setRowType(RowType.T_INT_DENSE);
        matrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        matrix.set(MatrixConf.MATRIX_HOGWILD, "false");
        matrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        matrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
        angelClient.addMatrix(matrix);
        matrix = new MatrixContext();
        matrix.setName("dense_int_mat_1");
        matrix.setRowNum(diRow);
        matrix.setColNum(diCol);
        matrix.setMaxRowNumInBlock(diRow / 2);
        matrix.setMaxColNumInBlock(diCol / 2);
        matrix.setRowType(RowType.T_INT_DENSE);
        matrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        matrix.set(MatrixConf.MATRIX_HOGWILD, "false");
        matrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        matrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
        angelClient.addMatrix(matrix);
        matrix = new MatrixContext();
        matrix.setName("dense_float_mat");
        matrix.setRowNum(dfRow);
        matrix.setColNum(dfCol);
        matrix.setMaxRowNumInBlock(dfRow / 2);
        matrix.setMaxColNumInBlock(dfCol / 2);
        matrix.setRowType(RowType.T_FLOAT_DENSE);
        matrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
        matrix.set(MatrixConf.MATRIX_HOGWILD, "false");
        matrix.set(MatrixConf.MATRIX_AVERAGE, "false");
        matrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_FLOAT");
        angelClient.addMatrix(matrix);
        angelClient.startPSServer();
        angelClient.run();
        Thread.sleep(10000);
        group0Id = new WorkerGroupId(0);
        worker0Id = new WorkerId(group0Id, 0);
        worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
        task0Id = new TaskId(0);
        psId = new ParameterServerId(0);
        psAttempt0Id = new PSAttemptId(psId, 0);
    } catch (Exception x) {
        LOG.error("setup failed ", x);
        throw x;
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapred.lib.CombineTextInputFormat) DummyTask(com.tencent.angel.master.DummyTask) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) TaskId(com.tencent.angel.worker.task.TaskId) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) IOException(java.io.IOException) BeforeClass(org.junit.BeforeClass)

Example 4 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class MatrixTransportClient method getRowSplit.

@SuppressWarnings("unchecked")
@Override
public Future<ServerRow> getRowSplit(PartitionKey partKey, int rowIndex, int clock) {
    ParameterServerId serverId = PSAgentContext.get().getMatrixMetaManager().getMasterPS(partKey);
    GetRowSplitRequest request = new GetRowSplitRequest(clock, partKey, rowIndex);
    FutureResult<ServerRow> future = new FutureResult<>();
    FutureResult<ServerRow> oldFuture = requestToResultMap.putIfAbsent(request, future);
    if (oldFuture != null) {
        LOG.debug("same request exist, just return old future");
        return oldFuture;
    } else {
        addToGetQueueForServer(serverId, request);
        startGet();
        return future;
    }
}
Also used : ServerRow(com.tencent.angel.ps.impl.matrix.ServerRow) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Example 5 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMMatrixCommitter method commitMatrix.

/**
 * Combine all output files of a model to a combine directory
 * @param matrixId matrix id
 * @param errorLogs error logs
 */
private void commitMatrix(int matrixId, Vector<String> errorLogs) {
    LOG.info("start commit matrix " + matrixId);
    // Init matrix files meta
    List<ParameterServerId> psIds = new ArrayList<>(context.getMatrixMetaManager().getMasterPsIds(matrixId));
    MatrixMeta meta = context.getMatrixMetaManager().getMatrix(matrixId);
    Map<String, String> kvMap = meta.getAttributes();
    ModelFilesMeta filesMeta = new ModelFilesMeta(matrixId, meta.getName(), meta.getRowType().getNumber(), meta.getRowNum(), meta.getColNum(), meta.getBlockRowNum(), meta.getBlockColNum(), kvMap);
    try {
        // Move output files
        Path srcPath = new Path(tmpOutputPath, ModelFilesConstent.resultDirName);
        Path destPath = new Path(tmpCombinePath, meta.getName());
        PartitionCommitOp partCommitOp = new PartitionCommitOp(srcPath, destPath, psIds, errorLogs, filesMeta, 0, psIds.size());
        fileOpExecutor.execute(partCommitOp);
        partCommitOp.join();
        // Write the meta file
        long startTs = System.currentTimeMillis();
        Path metaFile = new Path(destPath, ModelFilesConstent.modelMetaFileName);
        Path tmpMetaFile = HdfsUtil.toTmpPath(metaFile);
        FSDataOutputStream metaOut = fs.create(tmpMetaFile, (short) 1);
        filesMeta.write(metaOut);
        metaOut.flush();
        metaOut.close();
        HdfsUtil.rename(tmpMetaFile, metaFile, fs);
        LOG.info("commit meta file use time=" + (System.currentTimeMillis() - startTs));
    } catch (Throwable x) {
        errorLogs.add("move output files for matrix " + meta.getName() + " failed, error msg = " + x.getMessage());
        LOG.error("move output files for matrix " + meta.getName() + " failed.", x);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) ModelFilesMeta(com.tencent.angel.model.output.format.ModelFilesMeta) PSModelFilesMeta(com.tencent.angel.model.output.format.PSModelFilesMeta) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Aggregations

ParameterServerId (com.tencent.angel.ps.ParameterServerId)65 PSAttemptId (com.tencent.angel.ps.PSAttemptId)33 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)28 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)28 WorkerId (com.tencent.angel.worker.WorkerId)28 Configuration (org.apache.hadoop.conf.Configuration)28 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)27 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)27 Before (org.junit.Before)23 TaskId (com.tencent.angel.worker.task.TaskId)9 PSLocation (com.tencent.angel.ps.server.data.PSLocation)6 HashMap (java.util.HashMap)6 Location (com.tencent.angel.common.location.Location)5 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)5 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)5 ArrayList (java.util.ArrayList)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 Path (org.apache.hadoop.fs.Path)5 Test (org.junit.Test)5 AMParameterServer (com.tencent.angel.master.ps.ps.AMParameterServer)4