Search in sources :

Example 1 with ModelSaveContext

use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.

the class AngelClient method saveModel.

@SuppressWarnings("rawtypes")
@Override
public void saveModel(MLModel model) throws AngelException {
    if (master == null) {
        throw new AngelException("parameter servers are not started, you must execute startPSServer first!!");
    }
    Map<String, PSModel> psModels = model.getPSModels();
    ModelSaveContext saveContext = new ModelSaveContext();
    for (Map.Entry<String, PSModel> entry : psModels.entrySet()) {
        MatrixContext context = entry.getValue().getContext();
        String savePath = context.getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
        if (savePath != null) {
            saveContext.addMatrix(new MatrixSaveContext(context.getName(), conf.get("OUT_FORMAT_CLASS", RowIdColIdValueTextRowFormat.class.getName())));
        }
    }
    saveContext.setSavePath(conf.get(AngelConf.ANGEL_JOB_OUTPUT_PATH));
    save(saveContext);
    LOG.info("save is finish");
}
Also used : AngelException(com.tencent.angel.exception.AngelException) MatrixContext(com.tencent.angel.ml.matrix.MatrixContext) PSModel(com.tencent.angel.ml.model.PSModel) RowIdColIdValueTextRowFormat(com.tencent.angel.model.output.format.RowIdColIdValueTextRowFormat) MatrixSaveContext(com.tencent.angel.model.MatrixSaveContext) ModelSaveContext(com.tencent.angel.model.ModelSaveContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 2 with ModelSaveContext

use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.

the class AMModelSaver method epochUpdate.

/**
 * Model save trigger
 *
 * @param epochIndex current epoch index
 */
public void epochUpdate(int epochIndex) {
    if (epochTrigSave && (epochIndex > 0) && (epochIndex % saveModelFrequency == 0)) {
        LOG.info("Epoch " + epochIndex + " over, start to save model");
        Map<Integer, MatrixMeta> metas = context.getMatrixMetaManager().getMatrixMetas();
        if (metas.isEmpty()) {
            LOG.info("There are no matrices need save, just return");
            return;
        }
        String finalPath = context.getConf().get(AngelConf.ANGEL_JOB_OUTPUT_PATH);
        ModelSaveContext saveContext = new ModelSaveContext(finalPath);
        for (MatrixMeta meta : metas.values()) {
            String savePath = meta.getMatrixContext().getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
            if (savePath != null) {
                saveContext.addMatrix(new MatrixSaveContext(meta.getName()));
            }
        }
        try {
            save(saveContext, SaveTriggerMode.EPOCH_TRIGGER);
        } catch (Throwable x) {
            LOG.error("save model failed for epoch " + epochIndex, x);
        }
    }
}
Also used : MatrixMeta(com.tencent.angel.ml.matrix.MatrixMeta) PSMatrixSaveContext(com.tencent.angel.model.PSMatrixSaveContext) MatrixSaveContext(com.tencent.angel.model.MatrixSaveContext) ModelSaveContext(com.tencent.angel.model.ModelSaveContext)

Example 3 with ModelSaveContext

use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.

the class AMModelSaver method save.

private void save(ModelSaveRunningContext runningContext) {
    ModelSaveContext saveContext = runningContext.getSaveContext();
    try {
        lock.lock();
        currentRequestId = runningContext.getRequestId();
        LOG.info("Start to execute save request " + saveContext + " with request id=" + runningContext.getRequestId());
        // Split the user request to sub-requests to pss
        currentSubSaveContexts = split(currentRequestId, saveContext);
        subResults = new HashMap<>(currentSubSaveContexts.size());
        for (Map.Entry<ParameterServerId, PSMatricesSaveContext> entry : currentSubSaveContexts.entrySet()) {
            subResults.put(entry.getKey(), new PSMatricesSaveResult(entry.getValue().getRequestId(), entry.getValue().getSubRequestId(), SaveState.INIT));
        }
        receivedSubResult = 0;
    } finally {
        lock.unlock();
    }
}
Also used : PSMatricesSaveContext(com.tencent.angel.model.PSMatricesSaveContext) PSMatricesSaveResult(com.tencent.angel.model.PSMatricesSaveResult) ParameterServerId(com.tencent.angel.ps.ParameterServerId) ModelSaveContext(com.tencent.angel.model.ModelSaveContext) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 4 with ModelSaveContext

use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.

the class AMModelSaver method psSaveFinish.

/**
 * PS finish save request
 *
 * @param psId parameter server id
 * @param subResult the result of sub save request
 */
public void psSaveFinish(ParameterServerId psId, PSMatricesSaveResult subResult) {
    try {
        lock.lock();
        if (subResults == null || subResult.getRequestId() != currentRequestId) {
            return;
        }
        receivedSubResult++;
        subResults.put(psId, subResult);
        LOG.info("save subrequest, complete number=" + receivedSubResult + ", total number=" + subResults.size());
        if (receivedSubResult >= subResults.size()) {
            ModelSaveResult result = results.get(subResult.getRequestId());
            if (canCombine()) {
                ModelSaveContext saveContext = saveContexts.get(subResult.getRequestId());
                try {
                    result.setState(SaveState.COMBINING);
                    combine(saveContext, result);
                } catch (Throwable e) {
                    LOG.error("Master combine model files failed ", e);
                    saveFailed(result, StringUtils.stringifyException(e));
                }
            } else {
                String failedMsg = combineFailedLogs();
                LOG.error("PS save model failed. " + failedMsg);
                saveFailed(result, failedMsg);
            }
        }
    } finally {
        lock.unlock();
    }
}
Also used : ModelSaveResult(com.tencent.angel.model.ModelSaveResult) ModelSaveContext(com.tencent.angel.model.ModelSaveContext)

Example 5 with ModelSaveContext

use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.

the class MasterService method save.

/**
 * Save model to files.
 *
 * @param controller rpc controller of protobuf
 * @param request save request that contains all matrices need save
 * @throws ServiceException some matrices do not exist or save operation is interrupted
 */
@SuppressWarnings("unchecked")
@Override
public SaveResponse save(RpcController controller, SaveRequest request) throws ServiceException {
    ModelSaveContextProto saveContextProto = request.getSaveContext();
    ModelSaveContext saveContext = ProtobufUtil.convert(saveContextProto);
    List<MatrixSaveContext> needSaveMatrices = saveContext.getMatricesContext();
    int size = needSaveMatrices.size();
    for (int i = 0; i < size; i++) {
        if (!context.getMatrixMetaManager().exist(needSaveMatrices.get(i).getMatrixName())) {
            throw new ServiceException("matrix " + needSaveMatrices.get(i).getMatrixName() + " does not exist");
        }
    }
    int requestId;
    try {
        requestId = context.getModelSaver().save(saveContext);
    } catch (Throwable x) {
        throw new ServiceException(x);
    }
    return SaveResponse.newBuilder().setRequestId(requestId).build();
}
Also used : ServiceException(com.google.protobuf.ServiceException) ModelSaveContextProto(com.tencent.angel.protobuf.generated.ClientMasterServiceProtos.ModelSaveContextProto) MatrixSaveContext(com.tencent.angel.model.MatrixSaveContext) ModelSaveContext(com.tencent.angel.model.ModelSaveContext)

Aggregations

ModelSaveContext (com.tencent.angel.model.ModelSaveContext)7 MatrixSaveContext (com.tencent.angel.model.MatrixSaveContext)4 Map (java.util.Map)2 ServiceException (com.google.protobuf.ServiceException)1 AngelException (com.tencent.angel.exception.AngelException)1 MatrixContext (com.tencent.angel.ml.matrix.MatrixContext)1 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)1 PSModel (com.tencent.angel.ml.model.PSModel)1 ModelSaveResult (com.tencent.angel.model.ModelSaveResult)1 PSMatricesSaveContext (com.tencent.angel.model.PSMatricesSaveContext)1 PSMatricesSaveResult (com.tencent.angel.model.PSMatricesSaveResult)1 PSMatrixSaveContext (com.tencent.angel.model.PSMatrixSaveContext)1 RowIdColIdValueTextRowFormat (com.tencent.angel.model.output.format.RowIdColIdValueTextRowFormat)1 MatrixSaveContextProto (com.tencent.angel.protobuf.generated.ClientMasterServiceProtos.MatrixSaveContextProto)1 ModelSaveContextProto (com.tencent.angel.protobuf.generated.ClientMasterServiceProtos.ModelSaveContextProto)1 PSMatrixSaveContextProto (com.tencent.angel.protobuf.generated.PSMasterServiceProtos.PSMatrixSaveContextProto)1 ParameterServerId (com.tencent.angel.ps.ParameterServerId)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1