Search in sources :

Example 16 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMMatrixMetaManager method getMasterPsIds.

/**
 * Get master ps ids which contains the matrix
 *
 * @param matrixId matrix id
 * @return ps id set
 */
public Set<ParameterServerId> getMasterPsIds(int matrixId) {
    Set<ParameterServerId> psSet = new HashSet<>();
    Map<Integer, PartitionMeta> partMetas = matrixMetaManager.getMatrixMeta(matrixId).getPartitionMetas();
    for (PartitionMeta partMeta : partMetas.values()) {
        psSet.add(partMeta.getMasterPs());
    }
    return psSet;
}
Also used : MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta) ParameterServerId(com.tencent.angel.ps.ParameterServerId) IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet)

Example 17 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class ParameterServerManager method init.

/**
 * Init all PS
 */
public void init() {
    for (int i = 0; i < psNumber; i++) {
        ParameterServerId id = new ParameterServerId(i);
        AMParameterServer server = null;
        if (ips != null) {
            server = new AMParameterServer(ips[i], id, context);
        } else {
            server = new AMParameterServer(id, context);
        }
        if (psIdToAttemptIndexMap != null && psIdToAttemptIndexMap.containsKey(id)) {
            server.setNextAttemptNumber(psIdToAttemptIndexMap.get(id));
        }
        psMap.put(id, server);
    }
}
Also used : AMParameterServer(com.tencent.angel.master.ps.ps.AMParameterServer) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Example 18 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class PSAgent method initAndStart.

public void initAndStart() throws Exception {
    // Init control connection manager
    controlConnectManager = TConnectionManager.getConnection(conf);
    // Get ps locations from master and put them to the location cache.
    locationManager = new PSAgentLocationManager(PSAgentContext.get());
    locationManager.setMasterLocation(masterLocation);
    // Build and initialize rpc client to master
    masterClient = new MasterClient();
    masterClient.init();
    // Get psagent id
    id = masterClient.getPSAgentId();
    // Build PS control rpc client manager
    psControlClientManager = new PSControlClientManager();
    // Build local location
    String localIp = NetUtils.getRealLocalIP();
    int port = NetUtils.chooseAListenPort(conf);
    location = new Location(localIp, port);
    register();
    // Initialize matrix meta information
    // clockCache = new ClockCache();
    List<MatrixMeta> matrixMetas = masterClient.getMatrices();
    LOG.info("PSAgent get matrices from master," + matrixMetas.size());
    this.matrixMetaManager = new PSAgentMatrixMetaManager();
    matrixMetaManager.addMatrices(matrixMetas);
    Map<ParameterServerId, Location> psIdToLocMap = masterClient.getPSLocations();
    List<ParameterServerId> psIds = new ArrayList<>(psIdToLocMap.keySet());
    Collections.sort(psIds, new Comparator<ParameterServerId>() {

        @Override
        public int compare(ParameterServerId s1, ParameterServerId s2) {
            return s1.getIndex() - s2.getIndex();
        }
    });
    int size = psIds.size();
    locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
    for (int i = 0; i < size; i++) {
        if (psIdToLocMap.containsKey(psIds.get(i))) {
            locationManager.setPsLocation(psIds.get(i), psIdToLocMap.get(psIds.get(i)));
        }
    }
    matrixTransClient = new MatrixTransportClient();
    userRequestAdapter = new UserRequestAdapter();
    if (runningMode == RunningMode.ANGEL_PS_WORKER) {
        // opLogCache = new MatrixOpLogCache();
        matrixStorageManager = new MatrixStorageManager();
    // int staleness = conf.getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
    // consistencyController = new ConsistencyController(staleness);
    // consistencyController.init();
    }
    psAgentInitFinishedFlag.set(true);
    // Start all services
    matrixTransClient.start();
    userRequestAdapter.start();
    if (runningMode == RunningMode.ANGEL_PS_WORKER) {
    // clockCache.start();
    // opLogCache.start();
    }
}
Also used : MasterClient(com.tencent.angel.psagent.client.MasterClient) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PSAgentMatrixMetaManager(com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager) PSControlClientManager(com.tencent.angel.psagent.client.PSControlClientManager) MatrixTransportClient(com.tencent.angel.psagent.matrix.transport.MatrixTransportClient) PSAgentLocationManager(com.tencent.angel.psagent.matrix.PSAgentLocationManager) MatrixStorageManager(com.tencent.angel.psagent.matrix.storage.MatrixStorageManager) UserRequestAdapter(com.tencent.angel.psagent.matrix.transport.adapter.UserRequestAdapter) ParameterServerId(com.tencent.angel.ps.ParameterServerId) Location(com.tencent.angel.common.location.Location)

Example 19 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class PSAgentMatrixMetaManager method getPartLocation.

/**
 * Get partition location: includes stored pss and the location of the pss
 *
 * @param partitionKey partition information
 * @return partition location
 * @throws ServiceException
 */
public PartitionLocation getPartLocation(PartitionKey partitionKey) {
    List<ParameterServerId> psIds = getPss(partitionKey);
    if (psIds == null) {
        return new PartitionLocation(new ArrayList<>());
    }
    int size = psIds.size();
    List<PSLocation> psLocs = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        psLocs.add(new PSLocation(psIds.get(i), PSAgentContext.get().getLocationManager().getPsLocation(psIds.get(i))));
    }
    return new PartitionLocation(psLocs);
}
Also used : PSLocation(com.tencent.angel.ps.server.data.PSLocation) ParameterServerId(com.tencent.angel.ps.ParameterServerId) PartitionLocation(com.tencent.angel.ml.matrix.PartitionLocation)

Example 20 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class IndexGetRowsHashTest method setup.

@Before
public void setup() throws Exception {
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
    // use local deploy mode and dummy dataspliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 2);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_MODEL_PARTITIONER_PARTITION_SIZE, 1000);
    conf.setBoolean("use.new.split", true);
    conf.setInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 10);
    conf.setInt(AngelConf.ANGEL_WORKER_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_PS_HEARTBEAT_INTERVAL_MS, 1000);
    conf.setInt(AngelConf.ANGEL_WORKER_MAX_ATTEMPTS, 1);
    conf.setInt(AngelConf.ANGEL_PS_MAX_ATTEMPTS, 1);
    // get a angel client
    angelClient = AngelClientFactory.get(conf);
    // add sparse double matrix
    MatrixContext sMat = new MatrixContext();
    sMat.setName(SPARSE_DOUBLE_MAT);
    sMat.setRowNum(rowNum);
    sMat.setRowType(RowType.T_DOUBLE_SPARSE);
    sMat.setPartitionNum(partNum);
    sMat.setPartitionerClass(HashPartitioner.class);
    angelClient.addMatrix(sMat);
    // add sparse float matrix
    MatrixContext sfMat = new MatrixContext();
    sfMat.setName(SPARSE_FLOAT_MAT);
    sfMat.setRowNum(rowNum);
    sfMat.setPartitionNum(partNum);
    sfMat.setPartitionerClass(HashPartitioner.class);
    sfMat.setRowType(RowType.T_FLOAT_SPARSE);
    angelClient.addMatrix(sfMat);
    // add sparse float matrix
    MatrixContext siMat = new MatrixContext();
    siMat.setName(SPARSE_INT_MAT);
    siMat.setRowNum(rowNum);
    siMat.setPartitionNum(partNum);
    siMat.setPartitionerClass(HashPartitioner.class);
    siMat.setRowType(RowType.T_INT_SPARSE);
    angelClient.addMatrix(siMat);
    // add sparse long matrix
    MatrixContext slMat = new MatrixContext();
    slMat.setName(SPARSE_LONG_MAT);
    slMat.setRowNum(rowNum);
    slMat.setPartitionNum(partNum);
    slMat.setPartitionerClass(HashPartitioner.class);
    slMat.setRowType(RowType.T_LONG_SPARSE);
    angelClient.addMatrix(slMat);
    // add sparse long-key double matrix
    MatrixContext dLongKeysMatrix = new MatrixContext();
    dLongKeysMatrix.setName(SPARSE_DOUBLE_LONG_MAT);
    dLongKeysMatrix.setRowNum(rowNum);
    dLongKeysMatrix.setPartitionNum(partNum);
    dLongKeysMatrix.setPartitionerClass(HashPartitioner.class);
    dLongKeysMatrix.setRowType(RowType.T_DOUBLE_SPARSE_LONGKEY);
    angelClient.addMatrix(dLongKeysMatrix);
    // add sparse long-key float matrix
    MatrixContext slfMatrix = new MatrixContext();
    slfMatrix.setName(SPARSE_FLOAT_LONG_MAT);
    slfMatrix.setRowNum(rowNum);
    slfMatrix.setPartitionNum(partNum);
    slfMatrix.setPartitionerClass(HashPartitioner.class);
    slfMatrix.setRowType(RowType.T_FLOAT_SPARSE_LONGKEY);
    angelClient.addMatrix(slfMatrix);
    // add sparse long-key int matrix
    MatrixContext sliMatrix = new MatrixContext();
    sliMatrix.setName(SPARSE_INT_LONG_MAT);
    sliMatrix.setRowNum(rowNum);
    sliMatrix.setPartitionNum(partNum);
    sliMatrix.setPartitionerClass(HashPartitioner.class);
    sliMatrix.setRowType(RowType.T_INT_SPARSE_LONGKEY);
    angelClient.addMatrix(sliMatrix);
    // add sparse long-key long matrix
    MatrixContext sllMatrix = new MatrixContext();
    sllMatrix.setName(SPARSE_LONG_LONG_MAT);
    sllMatrix.setRowNum(rowNum);
    sllMatrix.setPartitionNum(partNum);
    sllMatrix.setPartitionerClass(HashPartitioner.class);
    sllMatrix.setRowType(RowType.T_LONG_SPARSE_LONGKEY);
    angelClient.addMatrix(sllMatrix);
    // Start PS
    angelClient.startPSServer();
    // Start to run application
    angelClient.run();
    Thread.sleep(5000);
    psId = new ParameterServerId(0);
    psAttempt0Id = new PSAttemptId(psId, 0);
    WorkerGroupId workerGroupId = new WorkerGroupId(0);
    workerId = new WorkerId(workerGroupId, 0);
    workerAttempt0Id = new WorkerAttemptId(workerId, 0);
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) Configuration(org.apache.hadoop.conf.Configuration) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId) WorkerId(com.tencent.angel.worker.WorkerId) WorkerGroupId(com.tencent.angel.worker.WorkerGroupId) Before(org.junit.Before)

Aggregations

ParameterServerId (com.tencent.angel.ps.ParameterServerId)65 PSAttemptId (com.tencent.angel.ps.PSAttemptId)33 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)28 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)28 WorkerId (com.tencent.angel.worker.WorkerId)28 Configuration (org.apache.hadoop.conf.Configuration)28 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)27 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)27 Before (org.junit.Before)23 TaskId (com.tencent.angel.worker.task.TaskId)9 PSLocation (com.tencent.angel.ps.server.data.PSLocation)6 HashMap (java.util.HashMap)6 Location (com.tencent.angel.common.location.Location)5 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)5 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)5 ArrayList (java.util.ArrayList)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 Path (org.apache.hadoop.fs.Path)5 Test (org.junit.Test)5 AMParameterServer (com.tencent.angel.master.ps.ps.AMParameterServer)4