use of com.tencent.angel.model.MatrixSaveContext in project angel by Tencent.
the class AngelClient method saveModel.
@SuppressWarnings("rawtypes")
@Override
public void saveModel(MLModel model) throws AngelException {
if (master == null) {
throw new AngelException("parameter servers are not started, you must execute startPSServer first!!");
}
Map<String, PSModel> psModels = model.getPSModels();
ModelSaveContext saveContext = new ModelSaveContext();
for (Map.Entry<String, PSModel> entry : psModels.entrySet()) {
MatrixContext context = entry.getValue().getContext();
String savePath = context.getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
if (savePath != null) {
saveContext.addMatrix(new MatrixSaveContext(context.getName(), conf.get("OUT_FORMAT_CLASS", RowIdColIdValueTextRowFormat.class.getName())));
}
}
saveContext.setSavePath(conf.get(AngelConf.ANGEL_JOB_OUTPUT_PATH));
save(saveContext);
LOG.info("save is finish");
}
use of com.tencent.angel.model.MatrixSaveContext in project angel by Tencent.
the class AMModelSaver method epochUpdate.
/**
* Model save trigger
*
* @param epochIndex current epoch index
*/
public void epochUpdate(int epochIndex) {
if (epochTrigSave && (epochIndex > 0) && (epochIndex % saveModelFrequency == 0)) {
LOG.info("Epoch " + epochIndex + " over, start to save model");
Map<Integer, MatrixMeta> metas = context.getMatrixMetaManager().getMatrixMetas();
if (metas.isEmpty()) {
LOG.info("There are no matrices need save, just return");
return;
}
String finalPath = context.getConf().get(AngelConf.ANGEL_JOB_OUTPUT_PATH);
ModelSaveContext saveContext = new ModelSaveContext(finalPath);
for (MatrixMeta meta : metas.values()) {
String savePath = meta.getMatrixContext().getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
if (savePath != null) {
saveContext.addMatrix(new MatrixSaveContext(meta.getName()));
}
}
try {
save(saveContext, SaveTriggerMode.EPOCH_TRIGGER);
} catch (Throwable x) {
LOG.error("save model failed for epoch " + epochIndex, x);
}
}
}
use of com.tencent.angel.model.MatrixSaveContext in project angel by Tencent.
the class MasterService method save.
/**
* Save model to files.
*
* @param controller rpc controller of protobuf
* @param request save request that contains all matrices need save
* @throws ServiceException some matrices do not exist or save operation is interrupted
*/
@SuppressWarnings("unchecked")
@Override
public SaveResponse save(RpcController controller, SaveRequest request) throws ServiceException {
ModelSaveContextProto saveContextProto = request.getSaveContext();
ModelSaveContext saveContext = ProtobufUtil.convert(saveContextProto);
List<MatrixSaveContext> needSaveMatrices = saveContext.getMatricesContext();
int size = needSaveMatrices.size();
for (int i = 0; i < size; i++) {
if (!context.getMatrixMetaManager().exist(needSaveMatrices.get(i).getMatrixName())) {
throw new ServiceException("matrix " + needSaveMatrices.get(i).getMatrixName() + " does not exist");
}
}
int requestId;
try {
requestId = context.getModelSaver().save(saveContext);
} catch (Throwable x) {
throw new ServiceException(x);
}
return SaveResponse.newBuilder().setRequestId(requestId).build();
}
use of com.tencent.angel.model.MatrixSaveContext in project angel by Tencent.
the class AngelClient method saveMatrices.
/**
* Save matrices to files.
*
* @param matrixNames need save matrix name list
*/
public void saveMatrices(List<String> matrixNames) {
ModelSaveContext saveContext = new ModelSaveContext();
saveContext.setSavePath(conf.get(AngelConf.ANGEL_JOB_OUTPUT_PATH));
for (String name : matrixNames) {
saveContext.addMatrix(new MatrixSaveContext(name));
}
save(saveContext);
}
use of com.tencent.angel.model.MatrixSaveContext in project angel by Tencent.
the class AMModelSaver method recordSaveResult.
private void recordSaveResult(ModelSaveContext saveContext) {
List<MatrixSaveContext> matricesContext = saveContext.getMatricesContext();
for (MatrixSaveContext matrixContext : matricesContext) {
int matrixId = context.getMatrixMetaManager().getMatrix(matrixContext.getMatrixName()).getId();
List<SaveResult> results;
if (saveContext.isCheckpoint()) {
results = matrixIdToCheckpointResults.get(matrixId);
if (results == null) {
results = matrixIdToCheckpointResults.putIfAbsent(matrixId, new ArrayList<>());
if (results == null) {
results = matrixIdToCheckpointResults.get(matrixId);
}
}
} else {
results = matrixIdToSaveResults.get(matrixId);
if (results == null) {
results = matrixIdToSaveResults.putIfAbsent(matrixId, new ArrayList<>());
if (results == null) {
results = matrixIdToSaveResults.get(matrixId);
}
}
}
results.add(new SaveResult(saveContext.getSavePath(), new Path(saveContext.getSavePath(), matrixContext.getMatrixName()).toString(), System.currentTimeMillis()));
LOG.info("Matrix " + matrixContext.getMatrixName() + " new save path = " + new Path(saveContext.getSavePath(), matrixContext.getMatrixName()).toString());
LOG.info("After this save, total save result number=" + results.size());
int maxSaveNum = saveContext.isCheckpoint() ? maxCheckpointItem : maxSaveItem;
while (results.size() > maxSaveNum) {
SaveResult oldResult = results.remove(0);
LOG.info("need remove old save results/checkpoint for matrix " + matrixContext.getMatrixName() + " remove path = " + oldResult.getMatrixPath());
try {
HdfsUtil.remove(context.getConf(), oldResult.getMatrixPath());
HdfsUtil.removeIfEmpty(context.getConf(), oldResult.getModelPath());
} catch (IOException e) {
LOG.warn("remove old save result/checkpoint " + saveContext.getSavePath() + " for matrix " + matrixContext.getMatrixName() + " failed ");
}
}
}
}
Aggregations