use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.
the class AngelClient method saveModel.
@SuppressWarnings("rawtypes")
@Override
public void saveModel(MLModel model) throws AngelException {
if (master == null) {
throw new AngelException("parameter servers are not started, you must execute startPSServer first!!");
}
Map<String, PSModel> psModels = model.getPSModels();
ModelSaveContext saveContext = new ModelSaveContext();
for (Map.Entry<String, PSModel> entry : psModels.entrySet()) {
MatrixContext context = entry.getValue().getContext();
String savePath = context.getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
if (savePath != null) {
saveContext.addMatrix(new MatrixSaveContext(context.getName(), conf.get("OUT_FORMAT_CLASS", RowIdColIdValueTextRowFormat.class.getName())));
}
}
saveContext.setSavePath(conf.get(AngelConf.ANGEL_JOB_OUTPUT_PATH));
save(saveContext);
LOG.info("save is finish");
}
use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.
the class AMModelSaver method epochUpdate.
/**
* Model save trigger
*
* @param epochIndex current epoch index
*/
public void epochUpdate(int epochIndex) {
if (epochTrigSave && (epochIndex > 0) && (epochIndex % saveModelFrequency == 0)) {
LOG.info("Epoch " + epochIndex + " over, start to save model");
Map<Integer, MatrixMeta> metas = context.getMatrixMetaManager().getMatrixMetas();
if (metas.isEmpty()) {
LOG.info("There are no matrices need save, just return");
return;
}
String finalPath = context.getConf().get(AngelConf.ANGEL_JOB_OUTPUT_PATH);
ModelSaveContext saveContext = new ModelSaveContext(finalPath);
for (MatrixMeta meta : metas.values()) {
String savePath = meta.getMatrixContext().getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
if (savePath != null) {
saveContext.addMatrix(new MatrixSaveContext(meta.getName()));
}
}
try {
save(saveContext, SaveTriggerMode.EPOCH_TRIGGER);
} catch (Throwable x) {
LOG.error("save model failed for epoch " + epochIndex, x);
}
}
}
use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.
the class AMModelSaver method save.
private void save(ModelSaveRunningContext runningContext) {
ModelSaveContext saveContext = runningContext.getSaveContext();
try {
lock.lock();
currentRequestId = runningContext.getRequestId();
LOG.info("Start to execute save request " + saveContext + " with request id=" + runningContext.getRequestId());
// Split the user request to sub-requests to pss
currentSubSaveContexts = split(currentRequestId, saveContext);
subResults = new HashMap<>(currentSubSaveContexts.size());
for (Map.Entry<ParameterServerId, PSMatricesSaveContext> entry : currentSubSaveContexts.entrySet()) {
subResults.put(entry.getKey(), new PSMatricesSaveResult(entry.getValue().getRequestId(), entry.getValue().getSubRequestId(), SaveState.INIT));
}
receivedSubResult = 0;
} finally {
lock.unlock();
}
}
use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.
the class AMModelSaver method psSaveFinish.
/**
* PS finish save request
*
* @param psId parameter server id
* @param subResult the result of sub save request
*/
public void psSaveFinish(ParameterServerId psId, PSMatricesSaveResult subResult) {
try {
lock.lock();
if (subResults == null || subResult.getRequestId() != currentRequestId) {
return;
}
receivedSubResult++;
subResults.put(psId, subResult);
LOG.info("save subrequest, complete number=" + receivedSubResult + ", total number=" + subResults.size());
if (receivedSubResult >= subResults.size()) {
ModelSaveResult result = results.get(subResult.getRequestId());
if (canCombine()) {
ModelSaveContext saveContext = saveContexts.get(subResult.getRequestId());
try {
result.setState(SaveState.COMBINING);
combine(saveContext, result);
} catch (Throwable e) {
LOG.error("Master combine model files failed ", e);
saveFailed(result, StringUtils.stringifyException(e));
}
} else {
String failedMsg = combineFailedLogs();
LOG.error("PS save model failed. " + failedMsg);
saveFailed(result, failedMsg);
}
}
} finally {
lock.unlock();
}
}
use of com.tencent.angel.model.ModelSaveContext in project angel by Tencent.
the class MasterService method save.
/**
* Save model to files.
*
* @param controller rpc controller of protobuf
* @param request save request that contains all matrices need save
* @throws ServiceException some matrices do not exist or save operation is interrupted
*/
@SuppressWarnings("unchecked")
@Override
public SaveResponse save(RpcController controller, SaveRequest request) throws ServiceException {
ModelSaveContextProto saveContextProto = request.getSaveContext();
ModelSaveContext saveContext = ProtobufUtil.convert(saveContextProto);
List<MatrixSaveContext> needSaveMatrices = saveContext.getMatricesContext();
int size = needSaveMatrices.size();
for (int i = 0; i < size; i++) {
if (!context.getMatrixMetaManager().exist(needSaveMatrices.get(i).getMatrixName())) {
throw new ServiceException("matrix " + needSaveMatrices.get(i).getMatrixName() + " does not exist");
}
}
int requestId;
try {
requestId = context.getModelSaver().save(saveContext);
} catch (Throwable x) {
throw new ServiceException(x);
}
return SaveResponse.newBuilder().setRequestId(requestId).build();
}
Aggregations