use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class MasterService method workerRegister.
/**
* worker register to master
*
* @param controller rpc controller of protobuf
* @param request contains worker attempt id, worker location
*/
@SuppressWarnings("unchecked")
@Override
public WorkerRegisterResponse workerRegister(RpcController controller, WorkerRegisterRequest request) throws ServiceException {
WorkerRegisterResponse.Builder registerResponseBuilder = WorkerRegisterResponse.newBuilder();
WorkerAttemptId workerAttemptId = ProtobufUtil.convertToId(request.getWorkerAttemptId());
LOG.info("Worker " + workerAttemptId + " register, location=" + request.getLocation() + ", psagent id=" + request.getPsAgentId());
// if worker attempt id is not in monitor set, we should shutdown it
if (!context.getWorkerManager().isAlive(workerAttemptId)) {
LOG.error("worker attempt " + workerAttemptId + " is not in running worker attempt set now, shutdown it");
registerResponseBuilder.setCommand(WorkerCommandProto.W_SHUTDOWN);
} else {
context.getWorkerManager().alive(workerAttemptId);
Location location = new Location(request.getLocation().getIp(), request.getLocation().getPort());
context.getEventHandler().handle(new WorkerAttemptRegisterEvent(workerAttemptId, location));
registerResponseBuilder.setCommand(WorkerCommandProto.W_SUCCESS);
LOG.info("worker attempt " + workerAttemptId + " register finished!");
}
return registerResponseBuilder.build();
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class ProtobufUtil method buildWorkerMetaProto.
private static WorkerMetaInfoProto buildWorkerMetaProto(AMWorker worker) {
WorkerMetaInfoProto.Builder builder = WorkerMetaInfoProto.newBuilder();
WorkerAttempt attempt = worker.getRunningAttempt();
WorkerAttemptIdProto workerAttemptIdProto = convertToIdProto(attempt.getId());
Location location = attempt.getLocation();
WorkerLocationProto.Builder locBuilder = WorkerLocationProto.newBuilder();
locBuilder.setWorkerAttemptId(workerAttemptIdProto);
if (location != null) {
locBuilder.setLocation(buildLocation(location));
}
builder.setWorkerLocation(locBuilder.build());
TaskMetaInfoProto.Builder taskMetaBuilder = TaskMetaInfoProto.newBuilder();
MatrixClock.Builder clockBuilder = MatrixClock.newBuilder();
for (Entry<TaskId, AMTask> taskEntry : attempt.getTaskMap().entrySet()) {
AMTask task = taskEntry.getValue();
taskMetaBuilder.setTaskId(convertToIdProto(taskEntry.getKey()));
taskMetaBuilder.setIteration(task.getIteration());
Int2IntOpenHashMap matrixClocks = task.getMatrixClocks();
for (it.unimi.dsi.fastutil.ints.Int2IntMap.Entry clockEntry : matrixClocks.int2IntEntrySet()) {
taskMetaBuilder.addMatrixClock(clockBuilder.setMatrixId(clockEntry.getIntKey()).setClock(clockEntry.getIntValue()).build());
}
builder.addTasks(taskMetaBuilder.build());
LOG.debug("task meta=" + taskMetaBuilder.build());
}
return builder.build();
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class PSAgent method initAndStart.
public void initAndStart() throws Exception {
// Init control connection manager
controlConnectManager = TConnectionManager.getConnection(conf);
// Get ps locations from master and put them to the location cache.
locationManager = new PSAgentLocationManager(PSAgentContext.get());
locationManager.setMasterLocation(masterLocation);
// Build and initialize rpc client to master
masterClient = new MasterClient();
masterClient.init();
// Get psagent id
id = masterClient.getPSAgentId();
// Build PS control rpc client manager
psControlClientManager = new PSControlClientManager();
// Build local location
String localIp = NetUtils.getRealLocalIP();
int port = NetUtils.chooseAListenPort(conf);
location = new Location(localIp, port);
register();
// Initialize matrix meta information
// clockCache = new ClockCache();
List<MatrixMeta> matrixMetas = masterClient.getMatrices();
LOG.info("PSAgent get matrices from master," + matrixMetas.size());
this.matrixMetaManager = new PSAgentMatrixMetaManager();
matrixMetaManager.addMatrices(matrixMetas);
Map<ParameterServerId, Location> psIdToLocMap = masterClient.getPSLocations();
List<ParameterServerId> psIds = new ArrayList<>(psIdToLocMap.keySet());
Collections.sort(psIds, new Comparator<ParameterServerId>() {
@Override
public int compare(ParameterServerId s1, ParameterServerId s2) {
return s1.getIndex() - s2.getIndex();
}
});
int size = psIds.size();
locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
for (int i = 0; i < size; i++) {
if (psIdToLocMap.containsKey(psIds.get(i))) {
locationManager.setPsLocation(psIds.get(i), psIdToLocMap.get(psIds.get(i)));
}
}
matrixTransClient = new MatrixTransportClient();
userRequestAdapter = new UserRequestAdapter();
if (runningMode == RunningMode.ANGEL_PS_WORKER) {
// opLogCache = new MatrixOpLogCache();
matrixStorageManager = new MatrixStorageManager();
// int staleness = conf.getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
// consistencyController = new ConsistencyController(staleness);
// consistencyController.init();
}
psAgentInitFinishedFlag.set(true);
// Start all services
matrixTransClient.start();
userRequestAdapter.start();
if (runningMode == RunningMode.ANGEL_PS_WORKER) {
// clockCache.start();
// opLogCache.start();
}
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class WorkerService method start.
public void start() throws IOException {
int workerServerPort = NetUtils.chooseAListenPort(WorkerContext.get().getConf());
String workerServerHost = InetAddress.getLocalHost().getHostAddress();
location = new Location(workerServerHost, workerServerPort);
rpcServer = MLRPC.getServer(WorkerService.class, this, new Class<?>[] { WorkerProtocol.class }, workerServerHost, workerServerPort, WorkerContext.get().getConf());
LOG.info("Starting workerserver service at " + workerServerHost + ":" + workerServerPort);
rpcServer.openServer();
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class MasterServiceTest method testMasterService.
@Test
public void testMasterService() throws Exception {
try {
LOG.info("===========================testMasterService===============================");
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
Location masterLoc = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMasterService().getLocation();
TConnection connection = TConnectionManager.getConnection(worker.getConf());
MasterProtocol master = connection.getMasterService(masterLoc.getIp(), masterLoc.getPort());
int psAgentId = master.getPSAgentId(null, PSAgentMasterServiceProtos.GetPSAgentIdRequest.getDefaultInstance()).getPsAgentId();
// worker register
WorkerAttemptId worker1Attempt0Id = new WorkerAttemptId(new WorkerId(new WorkerGroupId(1), 0), 0);
WorkerRegisterRequest registeRequest = WorkerRegisterRequest.newBuilder().setPsAgentId(psAgentId).setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker1Attempt0Id)).setLocation(LocationProto.newBuilder().setIp("0.0.0.0").setPort(10000).build()).build();
WorkerRegisterResponse registerResponse = master.workerRegister(null, registeRequest);
assertTrue(registerResponse.getCommand() == WorkerCommandProto.W_SHUTDOWN);
WorkerReportRequest.Builder reportBuilder = WorkerReportRequest.newBuilder();
Pair.Builder kvBuilder = Pair.newBuilder();
TaskStateProto.Builder taskBuilder = TaskStateProto.newBuilder();
reportBuilder.setWorkerAttemptId(ProtobufUtil.convertToIdProto(worker0Attempt0Id));
taskBuilder.setProgress(0.20f);
taskBuilder.setState("RUNNING");
taskBuilder.setTaskId(ProtobufUtil.convertToIdProto(task0Id));
kvBuilder.setKey("task_key1");
kvBuilder.setValue("100");
taskBuilder.addCounters(kvBuilder.build());
kvBuilder.setKey("task_key2");
kvBuilder.setValue("200");
taskBuilder.addCounters(kvBuilder.build());
reportBuilder.addTaskReports(taskBuilder.build());
taskBuilder.setProgress(0.30f);
taskBuilder.setState("RUNNING");
taskBuilder.setTaskId(ProtobufUtil.convertToIdProto(task1Id));
kvBuilder.setKey("task_key1");
kvBuilder.setValue("1000");
taskBuilder.addCounters(kvBuilder.build());
kvBuilder.setKey("task_key2");
kvBuilder.setValue("2000");
taskBuilder.addCounters(kvBuilder.build());
reportBuilder.addTaskReports(taskBuilder.build());
kvBuilder.setKey("worker_key1");
kvBuilder.setValue("100");
reportBuilder.addPairs(kvBuilder.build());
kvBuilder.setKey("worker_key2");
kvBuilder.setValue("200");
reportBuilder.addPairs(kvBuilder.build());
WorkerReportResponse reportResponse = master.workerReport(null, reportBuilder.build());
assertTrue(reportResponse.getCommand() == WorkerCommandProto.W_SUCCESS);
assertEquals(reportResponse.getActiveTaskNum(), 2);
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
WorkerAttempt worker0Attempt = angelAppMaster.getAppContext().getWorkerManager().getWorker(worker0Attempt0Id.getWorkerId()).getWorkerAttempt(worker0Attempt0Id);
assertTrue(worker0Attempt != null);
Map<String, String> workerMetrics = worker0Attempt.getMetrics();
String valueForWorkerKey1 = workerMetrics.get("worker_key1");
String valueForWorkerKey2 = workerMetrics.get("worker_key2");
assertNotNull(valueForWorkerKey1);
assertNotNull(valueForWorkerKey2);
assertEquals(valueForWorkerKey1, "100");
assertEquals(valueForWorkerKey2, "200");
AMTaskManager amTaskManager = angelAppMaster.getAppContext().getTaskManager();
AMTask task0 = amTaskManager.getTask(task0Id);
AMTask task1 = amTaskManager.getTask(task1Id);
assertTrue(task0 != null);
assertTrue(task1 != null);
Map<String, String> task0Metrics = task0.getMetrics();
Map<String, String> task1Metrics = task1.getMetrics();
String valueForTask0Key1 = task0Metrics.get("task_key1");
String valueForTask0Key2 = task0Metrics.get("task_key2");
String valueForTask1Key1 = task1Metrics.get("task_key1");
String valueForTask1Key2 = task1Metrics.get("task_key2");
assertTrue(valueForTask0Key1 != null);
assertTrue(valueForTask0Key2 != null);
assertTrue(valueForTask1Key1 != null);
assertTrue(valueForTask1Key2 != null);
assertEquals(valueForTask0Key1, "100");
assertEquals(valueForTask0Key2, "200");
assertEquals(valueForTask1Key1, "1000");
assertEquals(valueForTask1Key2, "2000");
assertEquals(task0.getProgress(), 0.20f, 0.000001);
assertEquals(task1.getProgress(), 0.30f, 0.000001);
} catch (Exception x) {
LOG.error("run testMasterService failed ", x);
throw x;
}
}
Aggregations