use of com.tencent.angel.model.ModelSaveResult in project angel by Tencent.
the class AMModelSaver method psSaveFinish.
/**
* PS finish save request
*
* @param psId parameter server id
* @param subResult the result of sub save request
*/
public void psSaveFinish(ParameterServerId psId, PSMatricesSaveResult subResult) {
try {
lock.lock();
if (subResults == null || subResult.getRequestId() != currentRequestId) {
return;
}
receivedSubResult++;
subResults.put(psId, subResult);
LOG.info("save subrequest, complete number=" + receivedSubResult + ", total number=" + subResults.size());
if (receivedSubResult >= subResults.size()) {
ModelSaveResult result = results.get(subResult.getRequestId());
if (canCombine()) {
ModelSaveContext saveContext = saveContexts.get(subResult.getRequestId());
try {
result.setState(SaveState.COMBINING);
combine(saveContext, result);
} catch (Throwable e) {
LOG.error("Master combine model files failed ", e);
saveFailed(result, StringUtils.stringifyException(e));
}
} else {
String failedMsg = combineFailedLogs();
LOG.error("PS save model failed. " + failedMsg);
saveFailed(result, failedMsg);
}
}
} finally {
lock.unlock();
}
}
use of com.tencent.angel.model.ModelSaveResult in project angel by Tencent.
the class MasterService method checkModelSaved.
/**
* Check save request is complete
*/
@Override
public CheckModelSavedResponse checkModelSaved(RpcController controller, CheckModelSavedRequest request) throws ServiceException {
LOG.info("check model saved=" + request);
ModelSaveResult result = context.getModelSaver().getModelSaveResult(request.getRequestId());
if (result == null) {
throw new ServiceException("can not find save request " + request.getRequestId());
} else {
CheckModelSavedResponse.Builder builder = CheckModelSavedResponse.newBuilder();
builder.setStatus(result.getState().getStateId());
if (result.getMessage() != null) {
builder.setLog(result.getMessage());
}
return builder.build();
}
}
use of com.tencent.angel.model.ModelSaveResult in project angel by Tencent.
the class AMModelSaver method save.
/**
* Save model
*
* @param saveContext save model context
* @return save request id
*/
public int save(ModelSaveContext saveContext, SaveTriggerMode triggerMode) {
try {
lock.lock();
int requestId = saveRequestIdGen++;
saveContext.setTmpSavePath(HdfsUtil.generateTmpDirectory(context.getConf(), context.getApplicationId().toString(), new Path(saveContext.getSavePath())).toString());
// Path tmpPath = new Path(new Path(context.getConf().get(AngelConf.ANGEL_JOB_TMP_OUTPUT_PATH)),
// String.valueOf(requestId));
// Path tmpPath = HdfsUtil.toTmpPath(new Path(saveContext.getSavePath()));
// saveContext.setTmpSavePath(tmpPath.toString());
saveContexts.put(requestId, saveContext);
results.put(requestId, new ModelSaveResult(requestId));
results.get(requestId).setState(SaveState.INIT);
boolean needAdd = true;
// Filter old epoch trigger first
if (triggerMode == SaveTriggerMode.EPOCH_TRIGGER) {
int size = waitingTasks.size();
for (int i = 0; i < size; i++) {
if (waitingTasks.get(i).getTriggerMode() == SaveTriggerMode.EPOCH_TRIGGER) {
LOG.info("there is another epoch trigger model save request waiting, just exit");
needAdd = false;
break;
}
}
}
if (needAdd) {
waitingTasks.add(new ModelSaveRunningContext(requestId, triggerMode, saveContext));
}
return requestId;
} finally {
lock.unlock();
}
}
Aggregations