Search in sources :

Example 51 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMMatrixMetaManager method handlePartReport.

private void handlePartReport(ParameterServerId psId, int matrixId, PartReport partReport) {
    ParameterServerId master = matrixMetaManager.getMasterPs(matrixId, partReport.partId);
    if (!psId.equals(master)) {
        MatrixMeta matrixMeta = matrixMetaManager.getMatrixMeta(matrixId);
        if (matrixMeta == null) {
            return;
        }
        matrixMeta.getPartitionMeta(partReport.partId).addReplicationPS(psId);
        if (partReport.state == PartitionState.INITIALIZING) {
            addNeedRecoverPart(master, new RecoverPartKey(new PartitionKey(matrixId, partReport.partId), new PSLocation(psId, context.getLocationManager().getPsLocation(psId))));
        } else if (partReport.state == PartitionState.READ_AND_WRITE) {
            ParameterServerId orignalMaster = matrixPartitionsOnPS.get(psId).get(matrixId).getPartitionMeta(partReport.partId).getMasterPs();
            if (orignalMaster.equals(psId)) {
                matrixMetaManager.getMatrixMeta(matrixId).getPartitionMeta(partReport.partId).makePsToMaster(psId);
            }
        }
    }
}
Also used : RecoverPartKey(com.tencent.angel.ps.ha.RecoverPartKey) PSLocation(com.tencent.angel.ps.server.data.PSLocation) PartitionKey(com.tencent.angel.PartitionKey) ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Example 52 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMMatrixMetaManager method isCreated.

public boolean isCreated(int matrixId) {
    boolean inited = true;
    try {
        readLock.lock();
        if (!matrixMetaManager.exists(matrixId)) {
            return false;
        }
        Set<ParameterServerId> psIdSet = matrixIdToPSSetMap.get(matrixId);
        if (psIdSet == null || psIdSet.isEmpty()) {
            return false;
        }
        inited = true;
        for (ParameterServerId psId : psIdSet) {
            if (!psIdToMatrixIdsMap.containsKey(psId) || !psIdToMatrixIdsMap.get(psId).contains(matrixId)) {
                inited = false;
                break;
            }
        }
    } finally {
        readLock.unlock();
    }
    return inited;
}
Also used : ParameterServerId(com.tencent.angel.ps.ParameterServerId)

Example 53 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AMModelLoader method split.

private Map<ParameterServerId, PSMatrixLoadContext> split(MatrixLoadContext matrixLoadContext, ModelLoadContext modelLoadContext) throws IOException {
    Path matrixPath = new Path(modelLoadContext.getLoadPath(), matrixLoadContext.getMatrixName());
    Path metaFilePath = new Path(matrixPath, ModelFilesConstent.modelMetaFileName);
    MatrixFilesMeta matrixFilesMeta = new MatrixFilesMeta();
    FileSystem fs = metaFilePath.getFileSystem(context.getConf());
    if (fs.exists(metaFilePath)) {
        FSDataInputStream input = fs.open(metaFilePath);
        try {
            matrixFilesMeta.read(input);
        } catch (Throwable e) {
            throw new IOException("Read matrix meta failed ", e);
        } finally {
            input.close();
        }
    } else {
        throw new IOException("Can not find meta file " + metaFilePath);
    }
    AMMatrixMetaManager matrixMetaManager = context.getMatrixMetaManager();
    MatrixMeta meta = matrixMetaManager.getMatrix(matrixLoadContext.getMatrixName());
    if (meta == null) {
        throw new IllegalStateException("Can not find matrix " + matrixLoadContext.getMatrixName());
    }
    Map<Integer, PartitionMeta> partitions = meta.getPartitionMetas();
    Map<ParameterServerId, Set<Integer>> psIdToPartIdsMap = new HashMap<>();
    for (Map.Entry<Integer, PartitionMeta> partEntry : partitions.entrySet()) {
        ParameterServerId psId = partEntry.getValue().getMasterPs();
        if (psId == null) {
            throw new IllegalStateException("Can not get ps for partition " + partEntry.getKey());
        }
        Set partIds = psIdToPartIdsMap.get(psId);
        if (partIds == null) {
            partIds = new HashSet();
            psIdToPartIdsMap.put(psId, partIds);
        }
        partIds.add(partEntry.getKey());
    }
    int matrixId = meta.getId();
    Map<ParameterServerId, PSMatrixLoadContext> ret = new HashMap<>(psIdToPartIdsMap.size());
    for (Map.Entry<ParameterServerId, Set<Integer>> entry : psIdToPartIdsMap.entrySet()) {
        List<Integer> partIds = new ArrayList<>(entry.getValue());
        partIds.sort(new Comparator<Integer>() {

            @Override
            public int compare(Integer id1, Integer id2) {
                return id1 - id2;
            }
        });
        PSMatrixLoadContext psMatrixLoadContext = new PSMatrixLoadContext(matrixId, matrixPath.toString(), partIds, matrixFilesMeta.getFormatClassName());
        ret.put(entry.getKey(), psMatrixLoadContext);
    }
    return ret;
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) AMMatrixMetaManager(com.tencent.angel.master.matrixmeta.AMMatrixMetaManager) FileSystem(org.apache.hadoop.fs.FileSystem) Path(org.apache.hadoop.fs.Path) PartitionMeta(com.tencent.angel.ml.matrix.PartitionMeta) IOException(java.io.IOException) MatrixFilesMeta(com.tencent.angel.model.output.format.MatrixFilesMeta) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParameterServerId(com.tencent.angel.ps.ParameterServerId) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 54 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class AppStateStorage method loadPSMeta.

/**
 * load ps meta from file
 *
 * @return Map<ParameterServerId,Integer> psId to attempt index map
 * @throws IOException
 */
public Map<ParameterServerId, Integer> loadPSMeta() throws IOException {
    try {
        psMetaLock.lock();
        // find ps meta file
        Path psMetaFilePath = null;
        try {
            psMetaFilePath = findFilePathUsePrefix(psMetaFileNamePrefix);
        } catch (Exception x) {
            LOG.error("find ps meta file failed.", x);
            return null;
        }
        // if ps meta file does not exist, just return null
        if (psMetaFilePath == null) {
            return null;
        }
        // read ps meta from file and deserialize it
        FSDataInputStream inputStream = fs.open(psMetaFilePath);
        int size = inputStream.readInt();
        Map<ParameterServerId, Integer> idToNextAttemptIndexMap = new HashMap<ParameterServerId, Integer>(size);
        for (int i = 0; i < size; i++) {
            idToNextAttemptIndexMap.put(new ParameterServerId(inputStream.readInt()), inputStream.readInt());
        }
        inputStream.close();
        return idToNextAttemptIndexMap;
    } finally {
        psMetaLock.unlock();
    }
}
Also used : HashMap(java.util.HashMap) ParameterServerId(com.tencent.angel.ps.ParameterServerId) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) InvalidParameterException(com.tencent.angel.exception.InvalidParameterException)

Example 55 with ParameterServerId

use of com.tencent.angel.ps.ParameterServerId in project angel by Tencent.

the class ParameterServerManager method startAllPS.

/**
 * Start all PS
 */
public void startAllPS() {
    if (useMiniBatch) {
        Thread requestThread = new RequestThread();
        requestThread.setName("resource-requester");
        requestThread.start();
    } else {
        for (Map.Entry<ParameterServerId, AMParameterServer> entry : psMap.entrySet()) {
            entry.getValue().handle(new AMParameterServerEvent(AMParameterServerEventType.PS_SCHEDULE, entry.getKey()));
        }
    }
}
Also used : AMParameterServerEvent(com.tencent.angel.master.ps.ps.AMParameterServerEvent) AMParameterServer(com.tencent.angel.master.ps.ps.AMParameterServer) ParameterServerId(com.tencent.angel.ps.ParameterServerId) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

ParameterServerId (com.tencent.angel.ps.ParameterServerId)65 PSAttemptId (com.tencent.angel.ps.PSAttemptId)33 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)28 WorkerGroupId (com.tencent.angel.worker.WorkerGroupId)28 WorkerId (com.tencent.angel.worker.WorkerId)28 Configuration (org.apache.hadoop.conf.Configuration)28 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)27 CombineTextInputFormat (org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat)27 Before (org.junit.Before)23 TaskId (com.tencent.angel.worker.task.TaskId)9 PSLocation (com.tencent.angel.ps.server.data.PSLocation)6 HashMap (java.util.HashMap)6 Location (com.tencent.angel.common.location.Location)5 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)5 PartitionLocation (com.tencent.angel.ml.matrix.PartitionLocation)5 ArrayList (java.util.ArrayList)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 Path (org.apache.hadoop.fs.Path)5 Test (org.junit.Test)5 AMParameterServer (com.tencent.angel.master.ps.ps.AMParameterServer)4