Search in sources :

Example 11 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMModelSaver method save.

private void save(ModelSaveRunningContext runningContext) {
    ModelSaveContext saveContext = runningContext.getSaveContext();
    try {
        lock.lock();
        currentRequestId = runningContext.getRequestId();
        LOG.info("Start to execute save request " + saveContext + " with request id=" + runningContext.getRequestId());
        // Split the user request to sub-requests to pss
        currentSubSaveContexts = split(currentRequestId, saveContext);
        subResults = new HashMap<>(currentSubSaveContexts.size());
        for (Map.Entry<ParameterServerId, PSMatricesSaveContext> entry : currentSubSaveContexts.entrySet()) {
            subResults.put(entry.getKey(), new PSMatricesSaveResult(entry.getValue().getRequestId(), entry.getValue().getSubRequestId(), SaveState.INIT));
        }
        receivedSubResult = 0;
    } finally {
        lock.unlock();
    }
}
Also used : PSMatricesSaveContext(com.tencent.angel.model.PSMatricesSaveContext) PSMatricesSaveResult(com.tencent.angel.model.PSMatricesSaveResult) ParameterServerId(com.tencent.angel.ps.ParameterServerId) ModelSaveContext(com.tencent.angel.model.ModelSaveContext) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 12 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AppStateStorage method writePSMeta.

/**
 * write ps meta to file
 *
 * @param psManager ps meta storage
 * @throws IOException
 */
public void writePSMeta(ParameterServerManager psManager) throws IOException {
    try {
        psMetaLock.lock();
        // generate a temporary file
        String psMetaFile = getPsMetaFile();
        String tmpFile = getPSMetaTmpeFile(psMetaFile);
        Path tmpPath = new Path(writeDir, tmpFile);
        FSDataOutputStream outputStream = fs.create(tmpPath);
        // write ps meta to the temporary file first.
        Map<ParameterServerId, AMParameterServer> psMap = psManager.getParameterServerMap();
        outputStream.writeInt(psMap.size());
        PSAttemptId attemptId = null;
        int nextAttemptIndex = 0;
        for (Entry<ParameterServerId, AMParameterServer> entry : psMap.entrySet()) {
            outputStream.writeInt(entry.getKey().getIndex());
            attemptId = entry.getValue().getRunningAttemptId();
            nextAttemptIndex = entry.getValue().getNextAttemptNumber();
            if (attemptId != null) {
                nextAttemptIndex = attemptId.getIndex();
            }
            outputStream.writeInt(nextAttemptIndex);
        }
        outputStream.close();
        // rename the temporary file to the final file
        Path psMetaFilePath = new Path(writeDir, psMetaFile);
        HdfsUtil.rename(tmpPath, psMetaFilePath, fs);
        // if the old final file exist, just remove it
        if (lastPsMetaFilePath != null) {
            fs.delete(lastPsMetaFilePath, false);
        }
        lastPsMetaFilePath = psMetaFilePath;
    } finally {
        psMetaLock.unlock();
    }
}
Also used : AMParameterServer(com.tencent.angel.master.ps.ps.AMParameterServer) PSAttemptId(com.tencent.angel.ps.PSAttemptId) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Example 13 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMModelSaver method split.

private Map<ParameterServerId, PSMatrixSaveContext> split(MatrixSaveContext matrixSaveContext) {
    AMMatrixMetaManager matrixMetaManager = context.getMatrixMetaManager();
    MatrixMeta meta = matrixMetaManager.getMatrix(matrixSaveContext.getMatrixName());
    if (meta == null) {
        throw new IllegalStateException("Can not find matrix " + matrixSaveContext.getMatrixName());
    }
    Map<Integer, PartitionMeta> partitions = meta.getPartitionMetas();
    List<Integer> rowIndexes = matrixSaveContext.getRowIndexes();
    Map<ParameterServerId, Set<Integer>> psIdToPartIdsMap = new HashMap<>();
    if (rowIndexes == null || rowIndexes.isEmpty()) {
        for (Map.Entry<Integer, PartitionMeta> partEntry : partitions.entrySet()) {
            ParameterServerId psId = partEntry.getValue().getMasterPs();
            if (psId == null) {
                throw new IllegalStateException("Can not get ps for partition " + partEntry.getKey());
            }
            Set partIds = psIdToPartIdsMap.get(psId);
            if (partIds == null) {
                partIds = new HashSet();
                psIdToPartIdsMap.put(psId, partIds);
            }
            partIds.add(partEntry.getKey());
        }
    } else {
        int size = rowIndexes.size();
        for (int i = 0; i < size; i++) {
            for (Map.Entry<Integer, PartitionMeta> partEntry : partitions.entrySet()) {
                if (!partEntry.getValue().contain(rowIndexes.get(i))) {
                    continue;
                }
                ParameterServerId psId = partEntry.getValue().getMasterPs();
                if (psId == null) {
                    throw new IllegalStateException("Can not get ps for partition " + partEntry.getKey());
                }
                Set partIds = psIdToPartIdsMap.get(psId);
                if (partIds == null) {
                    partIds = new HashSet();
                    psIdToPartIdsMap.put(psId, partIds);
                }
                partIds.add(partEntry.getKey());
            }
        }
    }
    int matrixId = meta.getId();
    Map<ParameterServerId, PSMatrixSaveContext> ret = new HashMap<>(psIdToPartIdsMap.size());
    for (Map.Entry<ParameterServerId, Set<Integer>> entry : psIdToPartIdsMap.entrySet()) {
        List<Integer> partIds = new ArrayList<>(entry.getValue());
        partIds.sort(new Comparator<Integer>() {

            @Override
            public int compare(Integer id1, Integer id2) {
                return id1 - id2;
            }
        });
        PSMatrixSaveContext psMatrixSaveContext = new PSMatrixSaveContext(matrixId, partIds, matrixSaveContext.getRowIndexes(), matrixSaveContext.getFormatClassName(), null, false, true);
        ret.put(entry.getKey(), psMatrixSaveContext);
    }
    return ret;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PSMatrixSaveContext(com.tencent.angel.model.PSMatrixSaveContext) ArrayList(java.util.ArrayList) PartitionMeta(com.tencent.angel.ml.matrix.PartitionMeta) AMMatrixMetaManager(com.tencent.angel.master.matrixmeta.AMMatrixMetaManager) ParameterServerId(com.tencent.angel.ps.ParameterServerId) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet)

Example 14 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMMatrixMetaManager method assignPSForPartitions.

private void assignPSForPartitions(Partitioner partitioner, List<PartitionMeta> partitions) {
    int partNum = partitions.size();
    for (int i = 0; i < partNum; i++) {
        int psIndex = partitioner.assignPartToServer(partitions.get(i).getPartId());
        ParameterServerId psId = new ParameterServerId(psIndex);
        partitions.get(i).addReplicationPS(psId);
        partitions.get(i).makePsToMaster(psId);
    }
}
Also used : ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Example 15 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMMatrixMetaManager method buildPSMatrixMeta.

/**
 * dispatch matrix partitions to parameter servers
 *
 * @param matrixMeta matrix meta proto
 */
private void buildPSMatrixMeta(MatrixMeta matrixMeta) {
    Map<Integer, PartitionMeta> partMetas = matrixMeta.getPartitionMetas();
    int matrixId = matrixMeta.getId();
    Set<ParameterServerId> psIdSet = matrixIdToPSSetMap.get(matrixId);
    if (psIdSet == null) {
        psIdSet = new HashSet<>();
        matrixIdToPSSetMap.put(matrixId, psIdSet);
    }
    for (Entry<Integer, PartitionMeta> partEntry : partMetas.entrySet()) {
        List<ParameterServerId> psList = partEntry.getValue().getPss();
        int size = psList.size();
        for (int i = 0; i < size; i++) {
            ParameterServerId psId = psList.get(i);
            Map<Integer, MatrixMeta> psMatrixIdToMetaMap = matrixPartitionsOnPS.get(psId);
            if (psMatrixIdToMetaMap == null) {
                psMatrixIdToMetaMap = new HashMap<>();
                matrixPartitionsOnPS.put(psId, psMatrixIdToMetaMap);
            }
            MatrixMeta psMatrixMeta = psMatrixIdToMetaMap.get(matrixId);
            if (psMatrixMeta == null) {
                psMatrixMeta = new MatrixMeta(partMetas.size(), matrixMeta.getMatrixContext());
                psMatrixIdToMetaMap.put(matrixId, psMatrixMeta);
            }
            psMatrixMeta.addPartitionMeta(partEntry.getKey(), new PartitionMeta(partEntry.getValue().getPartitionKey(), new ArrayList<>(partEntry.getValue().getPss())));
            psIdSet.add(psId);
        }
    }
// for(Entry<ParameterServerId, Map<Integer, MatrixMeta>> psEntry : matrixPartitionsOnPS.entrySet()) {
// LOG.info("ps id = " + psEntry.getKey());
// Map<Integer, MatrixMeta> matrixIdToMetaMap = psEntry.getValue();
// for(Entry<Integer, MatrixMeta> metaEntry : matrixIdToMetaMap.entrySet()) {
// LOG.info("matrix id = " + metaEntry.getKey());
// LOG.info("matrix partitons number = " + metaEntry.getValue().getPartitionMetas().size());
// }
// }
}
Also used : MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Aggregations

ParameterServerId (com.tencent.angel.ps.ParameterServerId)65 PSAttemptId (com.tencent.angel.ps.PSAttemptId)33 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)28 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)28 WorkerId (com.tencent.angel.worker.WorkerId)28 Configuration (org.apache.hadoop.conf.Configuration)28 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)27 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)27 Before (org.junit.Before)23 TaskId (com.tencent.angel.worker.task.TaskId)9 PSLocation (com.tencent.angel.ps.server.data.PSLocation)6 HashMap (java.util.HashMap)6 Location (com.tencent.angel.common.location.Location)5 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)5 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)5 ArrayList (java.util.ArrayList)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 Path (org.apache.hadoop.fs.Path)5 Test (org.junit.Test)5 AMParameterServer (com.tencent.angel.master.ps.ps.AMParameterServer)4