use of com.tencent.angel.ml.matrix.MatrixMeta in project angel by Tencent.
the class MasterService method psReport.
/**
* response for parameter server heartbeat
*
* @param controller rpc controller of protobuf
* @param request heartbeat request
*/
@SuppressWarnings("unchecked")
@Override
public PSReportResponse psReport(RpcController controller, PSReportRequest request) throws ServiceException {
if (LOG.isDebugEnabled()) {
LOG.debug("receive ps heartbeat request. request=" + request);
}
// parse parameter server counters
List<Pair> params = request.getMetricsList();
int size = params.size();
Map<String, String> paramsMap = new HashMap<String, String>();
for (int i = 0; i < size; i++) {
paramsMap.put(params.get(i).getKey(), params.get(i).getValue());
}
PSAttemptId psAttemptId = ProtobufUtil.convertToId(request.getPsAttemptId());
PSReportResponse.Builder resBuilder = PSReportResponse.newBuilder();
if (!context.getParameterServerManager().isAlive(psAttemptId)) {
// if psAttemptId is not in monitor set, just return a PSCOMMAND_SHUTDOWN command.
LOG.error("ps attempt " + psAttemptId + " is not in running ps attempt set");
resBuilder.setPsCommand(PSCommandProto.PSCOMMAND_SHUTDOWN);
} else {
resBuilder.setPsCommand(PSCommandProto.PSCOMMAND_OK);
// refresh last heartbeat timestamp
context.getParameterServerManager().alive(psAttemptId);
// send a state update event to the specific PSAttempt
context.getEventHandler().handle(new PSAttemptStateUpdateEvent(psAttemptId, paramsMap));
// Check is there save request
PSMatricesSaveContext subSaveContext = context.getModelSaver().getSaveContext(psAttemptId.getPsId());
PSMatricesSaveResult subSaveResult = context.getModelSaver().getSaveResult(psAttemptId.getPsId());
if (subSaveContext != null && subSaveResult != null && (subSaveContext.getRequestId() == subSaveResult.getRequestId()) && (subSaveResult.getState() == SaveState.INIT || subSaveResult.getState() == SaveState.SAVING)) {
// LOG.info("PS " + psAttemptId + " need save " + subSaveContext);
resBuilder.setNeedSaveMatrices(ProtobufUtil.convert(subSaveContext));
}
// Check is there load request
PSMatricesLoadContext subLoadContext = context.getModelLoader().getLoadContext(psAttemptId.getPsId());
PSMatricesLoadResult subLoadResult = context.getModelLoader().getLoadResult(psAttemptId.getPsId());
if (subLoadContext != null && subLoadResult != null && subLoadContext.getRequestId() == subLoadResult.getRequestId() && (subLoadResult.getState() == LoadState.INIT || subLoadResult.getState() == LoadState.LOADING)) {
// LOG.info("PS " + psAttemptId + " need load " + subLoadContext);
resBuilder.setNeedLoadMatrices(ProtobufUtil.convert(subLoadContext));
}
// check matrix metadata inconsistencies between master and parameter server.
// if a matrix exists on the Master and does not exist on ps, then it is necessary to notify ps to establish the matrix
// if a matrix exists on the ps and does not exist on master, then it is necessary to notify ps to remove the matrix
List<MatrixReportProto> matrixReportsProto = request.getMatrixReportsList();
List<Integer> needReleaseMatrices = new ArrayList<>();
List<MatrixMeta> needCreateMatrices = new ArrayList<>();
List<RecoverPartKey> needRecoverParts = new ArrayList<>();
List<MatrixReport> matrixReports = ProtobufUtil.convertToMatrixReports(matrixReportsProto);
context.getMatrixMetaManager().syncMatrixInfos(matrixReports, needCreateMatrices, needReleaseMatrices, needRecoverParts, psAttemptId.getPsId());
size = needCreateMatrices.size();
for (int i = 0; i < size; i++) {
resBuilder.addNeedCreateMatrices(ProtobufUtil.convertToMatrixMetaProto(needCreateMatrices.get(i)));
}
size = needReleaseMatrices.size();
for (int i = 0; i < size; i++) {
resBuilder.addNeedReleaseMatrixIds(needReleaseMatrices.get(i));
}
size = needRecoverParts.size();
for (int i = 0; i < size; i++) {
resBuilder.addNeedRecoverParts(ProtobufUtil.convert(needRecoverParts.get(i)));
}
}
return resBuilder.build();
}
use of com.tencent.angel.ml.matrix.MatrixMeta in project angel by Tencent.
the class AMModelSaver method epochUpdate.
/**
* Model save trigger
*
* @param epochIndex current epoch index
*/
public void epochUpdate(int epochIndex) {
if (epochTrigSave && (epochIndex > 0) && (epochIndex % saveModelFrequency == 0)) {
LOG.info("Epoch " + epochIndex + " over, start to save model");
Map<Integer, MatrixMeta> metas = context.getMatrixMetaManager().getMatrixMetas();
if (metas.isEmpty()) {
LOG.info("There are no matrices need save, just return");
return;
}
String finalPath = context.getConf().get(AngelConf.ANGEL_JOB_OUTPUT_PATH);
ModelSaveContext saveContext = new ModelSaveContext(finalPath);
for (MatrixMeta meta : metas.values()) {
String savePath = meta.getMatrixContext().getAttributes().get(MatrixConf.MATRIX_SAVE_PATH);
if (savePath != null) {
saveContext.addMatrix(new MatrixSaveContext(meta.getName()));
}
}
try {
save(saveContext, SaveTriggerMode.EPOCH_TRIGGER);
} catch (Throwable x) {
LOG.error("save model failed for epoch " + epochIndex, x);
}
}
}
use of com.tencent.angel.ml.matrix.MatrixMeta in project angel by Tencent.
the class AMModelSaver method split.
private Map<ParameterServerId, PSMatrixSaveContext> split(MatrixSaveContext matrixSaveContext) {
AMMatrixMetaManager matrixMetaManager = context.getMatrixMetaManager();
MatrixMeta meta = matrixMetaManager.getMatrix(matrixSaveContext.getMatrixName());
if (meta == null) {
throw new IllegalStateException("Can not find matrix " + matrixSaveContext.getMatrixName());
}
Map<Integer, PartitionMeta> partitions = meta.getPartitionMetas();
List<Integer> rowIndexes = matrixSaveContext.getRowIndexes();
Map<ParameterServerId, Set<Integer>> psIdToPartIdsMap = new HashMap<>();
if (rowIndexes == null || rowIndexes.isEmpty()) {
for (Map.Entry<Integer, PartitionMeta> partEntry : partitions.entrySet()) {
ParameterServerId psId = partEntry.getValue().getMasterPs();
if (psId == null) {
throw new IllegalStateException("Can not get ps for partition " + partEntry.getKey());
}
Set partIds = psIdToPartIdsMap.get(psId);
if (partIds == null) {
partIds = new HashSet();
psIdToPartIdsMap.put(psId, partIds);
}
partIds.add(partEntry.getKey());
}
} else {
int size = rowIndexes.size();
for (int i = 0; i < size; i++) {
for (Map.Entry<Integer, PartitionMeta> partEntry : partitions.entrySet()) {
if (!partEntry.getValue().contain(rowIndexes.get(i))) {
continue;
}
ParameterServerId psId = partEntry.getValue().getMasterPs();
if (psId == null) {
throw new IllegalStateException("Can not get ps for partition " + partEntry.getKey());
}
Set partIds = psIdToPartIdsMap.get(psId);
if (partIds == null) {
partIds = new HashSet();
psIdToPartIdsMap.put(psId, partIds);
}
partIds.add(partEntry.getKey());
}
}
}
int matrixId = meta.getId();
Map<ParameterServerId, PSMatrixSaveContext> ret = new HashMap<>(psIdToPartIdsMap.size());
for (Map.Entry<ParameterServerId, Set<Integer>> entry : psIdToPartIdsMap.entrySet()) {
List<Integer> partIds = new ArrayList<>(entry.getValue());
partIds.sort(new Comparator<Integer>() {
@Override
public int compare(Integer id1, Integer id2) {
return id1 - id2;
}
});
PSMatrixSaveContext psMatrixSaveContext = new PSMatrixSaveContext(matrixId, partIds, matrixSaveContext.getRowIndexes(), matrixSaveContext.getFormatClassName(), null, false, true);
ret.put(entry.getKey(), psMatrixSaveContext);
}
return ret;
}
use of com.tencent.angel.ml.matrix.MatrixMeta in project angel by Tencent.
the class ValuesCombineUtils method mergeSparseLongCompVector.
public static CompIntLongVector mergeSparseLongCompVector(IndexGetParam param, List<PartitionGetResult> partResults) {
Map<PartitionKey, PartitionGetResult> partKeyToResultMap = mapPartKeyToResult(partResults);
List<PartitionKey> partKeys = getSortedPartKeys(param.matrixId, param.getRowId());
MatrixMeta meta = PSAgentContext.get().getMatrixMetaManager().getMatrixMeta(param.matrixId);
int dim = (int) meta.getColNum();
int subDim = (int) meta.getBlockColNum();
int size = partKeys.size();
IntLongVector[] splitVecs = new IntLongVector[size];
for (int i = 0; i < size; i++) {
if (param.getPartKeyToIndexesMap().containsKey(partKeys.get(i))) {
long[] values = ((IndexPartGetLongResult) partKeyToResultMap.get(partKeys.get(i))).getValues();
int[] indices = param.getPartKeyToIndexesMap().get(partKeys.get(i));
transformIndices(indices, partKeys.get(i));
splitVecs[i] = VFactory.sparseLongVector(subDim, indices, values);
} else {
splitVecs[i] = VFactory.sparseLongVector(subDim, 0);
}
}
CompIntLongVector vector = VFactory.compIntLongVector(dim, splitVecs, subDim);
vector.setMatrixId(param.getMatrixId());
vector.setRowId(param.getRowId());
return vector;
}
use of com.tencent.angel.ml.matrix.MatrixMeta in project angel by Tencent.
the class ValuesCombineUtils method mergeSparseIntCompVector.
public static CompLongIntVector mergeSparseIntCompVector(LongIndexGetParam param, List<PartitionGetResult> partResults) {
Map<PartitionKey, PartitionGetResult> partKeyToResultMap = mapPartKeyToResult(partResults);
List<PartitionKey> partKeys = getSortedPartKeys(param.matrixId, param.getRowId());
MatrixMeta meta = PSAgentContext.get().getMatrixMetaManager().getMatrixMeta(param.matrixId);
long dim = meta.getColNum();
long subDim = meta.getBlockColNum();
int size = partKeys.size();
LongIntVector[] splitVecs = new LongIntVector[size];
for (int i = 0; i < size; i++) {
if (param.getPartKeyToIndexesMap().containsKey(partKeys.get(i))) {
int[] values = ((IndexPartGetIntResult) partKeyToResultMap.get(partKeys.get(i))).getValues();
long[] indices = param.getPartKeyToIndexesMap().get(partKeys.get(i));
transformIndices(indices, partKeys.get(i));
splitVecs[i] = VFactory.sparseLongKeyIntVector(subDim, indices, values);
} else {
splitVecs[i] = VFactory.sparseLongKeyIntVector(subDim, 0);
}
}
CompLongIntVector vector = VFactory.compLongIntVector(dim, splitVecs, subDim);
vector.setMatrixId(param.getMatrixId());
vector.setRowId(param.getRowId());
return vector;
}
Aggregations