use of com.tencent.angel.master.worker.WorkerManager in project angel by Tencent.
the class PSAgentTest method testClockCache.
@Test
public void testClockCache() throws Exception {
try {
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
assertTrue(taskManager != null);
WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
assertTrue(workerManager != null);
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
assertTrue(worker != null);
PSAgent psAgent = worker.getPSAgent();
assertTrue(psAgent != null);
ClockCache clockCache = psAgent.getClockCache();
assertTrue(clockCache != null);
int rowClock = clockCache.getClock(1, 0);
assertEquals(rowClock, 0);
} catch (Exception x) {
LOG.error("run testClockCache failed ", x);
throw x;
}
}
use of com.tencent.angel.master.worker.WorkerManager in project angel by Tencent.
the class PSAgentTest method testPSAgentContext.
@Test
public void testPSAgentContext() throws Exception {
try {
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
assertTrue(taskManager != null);
WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
assertTrue(workerManager != null);
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
assertTrue(worker != null);
PSAgent psAgent = worker.getPSAgent();
assertTrue(psAgent != null);
PSAgentContext psAgentContext = PSAgentContext.get();
assertTrue(psAgentContext.getPsAgent() != null);
assertTrue(psAgentContext.getConf() != null);
assertTrue(psAgentContext.getMetrics() != null);
assertTrue(psAgentContext.getMasterClient() != null);
assertTrue(psAgentContext.getIdProto() != null);
assertTrue(psAgentContext.getOpLogCache() != null);
assertTrue(psAgentContext.getMatrixTransportClient() != null);
assertTrue(psAgentContext.getMatrixMetaManager() != null);
assertTrue(psAgentContext.getMatrixMetaManager() != null);
assertTrue(psAgentContext.getLocationManager() != null);
assertEquals(psAgentContext.getRunningMode(), psAgent.getRunningMode());
assertEquals(psAgentContext.getIp(), psAgent.getIp());
assertEquals(psAgentContext.getStaleness(), psAgent.getConf().getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS));
assertEquals(psAgentContext.getConsistencyController(), psAgent.getConsistencyController());
assertEquals(psAgentContext.getMatrixOpLogCache(), psAgent.getOpLogCache());
assertEquals(psAgentContext.getClockCache(), psAgent.getClockCache());
assertEquals(psAgentContext.getMatricesCache(), psAgent.getMatricesCache());
assertEquals(psAgentContext.getMatrixStorageManager(), psAgent.getMatrixStorageManager());
assertEquals(psAgentContext.getMatrixClientAdapter(), psAgent.getMatrixClientAdapter());
assertEquals(psAgentContext.getExecutor(), psAgent.getExecutor());
assertTrue(psAgentContext.getTaskContext(1) != null);
assertTrue(psAgentContext.getTaskContext(2) != null);
int taskNum = psAgentContext.getTotalTaskNum();
assertEquals(taskNum, 2);
int localTaskNum = psAgentContext.getLocalTaskNum();
assertEquals(localTaskNum, 2);
} catch (Exception x) {
LOG.error("run testPSAgentContext failed ", x);
throw x;
}
}
use of com.tencent.angel.master.worker.WorkerManager in project angel by Tencent.
the class PSAgentTest method testTaskContext.
@Test
public void testTaskContext() throws Exception {
try {
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
assertTrue(taskManager != null);
WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
assertTrue(workerManager != null);
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
assertTrue(worker != null);
PSAgent psAgent = worker.getPSAgent();
assertTrue(psAgent != null);
PSAgentContext psAgentContext = PSAgentContext.get();
assertTrue(psAgentContext.getPsAgent() != null);
TaskContext taskContext1 = psAgentContext.getTaskContext(1);
TaskContext taskContext2 = psAgentContext.getTaskContext(2);
assertTrue(taskContext1 != null);
assertTrue(taskContext2 != null);
assertEquals(taskContext1.getIndex(), 1);
assertEquals(taskContext2.getIndex(), 2);
assertEquals(taskContext1.getEpoch(), 0);
assertEquals(taskContext2.getEpoch(), 0);
assertEquals(taskContext1.getMatrixClock(1), 0);
assertEquals(taskContext2.getMatrixClock(2), 0);
assertEquals(taskContext1.getMatrixClocks().size(), 1);
assertEquals(taskContext2.getMatrixClocks().size(), 1);
assertEquals(taskContext1.getProgress(), 0.0, 1e-5);
assertEquals(taskContext2.getProgress(), 0.0, 1e-5);
} catch (Exception x) {
LOG.error("run testTaskContext failed ", x);
throw x;
}
}
use of com.tencent.angel.master.worker.WorkerManager in project angel by Tencent.
the class AngelApplicationMaster method initAndStart.
/**
* init and start all service modules for angel applicaiton master.
*/
public void initAndStart() throws Exception {
addIfService(angelApp);
// init app state storage
String tmpOutPath = conf.get(AngelConf.ANGEL_JOB_TMP_OUTPUT_PATH);
Path appStatePath = new Path(tmpOutPath, "app");
LOG.info("app state output path = " + appStatePath.toUri().toString());
FileSystem fs = appStatePath.getFileSystem(conf);
appStateStorage = new AppStateStorage(appContext, appStatePath.toUri().toString(), fs);
addIfService(appStateStorage);
LOG.info("build app state storage success");
// init event dispacher
dispatcher = new AsyncDispatcher();
addIfService(dispatcher);
LOG.info("build event dispacher");
// init location manager
locationManager = new LocationManager();
// init container allocator
AngelDeployMode deployMode = appContext.getDeployMode();
LOG.info("deploy mode=" + deployMode);
if (deployMode == AngelDeployMode.LOCAL) {
containerAllocator = new LocalContainerAllocator(appContext);
containerLauncher = new LocalContainerLauncher(appContext);
} else {
containerAllocator = new YarnContainerAllocator(appContext);
containerLauncher = new YarnContainerLauncher(appContext);
}
addIfService(containerAllocator);
dispatcher.register(ContainerAllocatorEventType.class, containerAllocator);
LOG.info("build containerAllocator success");
addIfService(containerLauncher);
dispatcher.register(ContainerLauncherEventType.class, containerLauncher);
LOG.info("build containerLauncher success");
// init a rpc service
masterService = new MasterService(appContext);
LOG.info("build master service success");
// recover matrix meta if needed
recoverMatrixMeta();
// recover ps attempt information if need
Map<ParameterServerId, Integer> psIdToAttemptIndexMap = recoverPSAttemptIndex();
if (psIdToAttemptIndexMap == null) {
LOG.info("recoverPSAttemptIndex return is null");
} else {
for (Entry<ParameterServerId, Integer> entry : psIdToAttemptIndexMap.entrySet()) {
LOG.info("psId=" + entry.getKey() + ",attemptIndex=" + entry.getValue());
}
}
// init parameter server manager
psManager = new ParameterServerManager(appContext, psIdToAttemptIndexMap);
addIfService(psManager);
psManager.init();
List<ParameterServerId> psIds = new ArrayList<>(psManager.getParameterServerMap().keySet());
Collections.sort(psIds, new Comparator<ParameterServerId>() {
@Override
public int compare(ParameterServerId s1, ParameterServerId s2) {
return s1.getIndex() - s2.getIndex();
}
});
locationManager.setPsIds(psIds.toArray(new ParameterServerId[0]));
dispatcher.register(ParameterServerManagerEventType.class, psManager);
dispatcher.register(AMParameterServerEventType.class, new ParameterServerEventHandler());
dispatcher.register(PSAttemptEventType.class, new PSAttemptEventDispatcher());
LOG.info("build PSManager success");
// recover task information if needed
recoverTaskState();
RunningMode mode = appContext.getRunningMode();
LOG.info("running mode=" + mode);
switch(mode) {
case ANGEL_PS_PSAGENT:
{
// init psagent manager and register psagent manager event
psAgentManager = new PSAgentManager(appContext);
addIfService(psAgentManager);
dispatcher.register(PSAgentManagerEventType.class, psAgentManager);
dispatcher.register(AMPSAgentEventType.class, new PSAgentEventHandler());
dispatcher.register(PSAgentAttemptEventType.class, new PSAgentAttemptEventHandler());
LOG.info("build PSAgentManager success");
break;
}
case ANGEL_PS_WORKER:
{
// a dummy data spliter is just for test now
boolean useDummyDataSpliter = conf.getBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, AngelConf.DEFAULT_ANGEL_AM_USE_DUMMY_DATASPLITER);
if (useDummyDataSpliter) {
dataSpliter = new DummyDataSpliter(appContext);
} else {
// recover data splits information if needed
recoveryDataSplits();
}
// init worker manager and register worker manager event
workerManager = new WorkerManager(appContext);
workerManager.adjustTaskNumber(dataSpliter.getSplitNum());
addIfService(workerManager);
dispatcher.register(WorkerManagerEventType.class, workerManager);
dispatcher.register(AMWorkerGroupEventType.class, new WorkerGroupEventHandler());
dispatcher.register(AMWorkerEventType.class, new WorkerEventHandler());
dispatcher.register(WorkerAttemptEventType.class, new WorkerAttemptEventHandler());
LOG.info("build WorkerManager success");
break;
}
case ANGEL_PS:
break;
}
// register slow worker/ps checker
addIfService(new SlowChecker(appContext));
algoMetricsService = new MetricsService(appContext);
addIfService(algoMetricsService);
dispatcher.register(MetricsEventType.class, algoMetricsService);
// register app manager event and finish event
dispatcher.register(AppEventType.class, angelApp);
dispatcher.register(AppFinishEventType.class, new AppFinishEventHandler());
masterService.init(conf);
super.init(conf);
// start a web service if use yarn deploy mode
if (deployMode == AngelDeployMode.YARN) {
try {
webApp = WebApps.$for("angel", AMContext.class, appContext).with(conf).start(new AngelWebApp());
LOG.info("start webapp server success");
LOG.info("webApp.port()=" + webApp.port());
} catch (Exception e) {
LOG.error("Webapps failed to start. Ignoring for now:", e);
}
}
masterService.start();
locationManager.setMasterLocation(masterService.getLocation());
super.serviceStart();
psManager.startAllPS();
AngelServiceLoader.startServiceIfNeed(this, getConfig());
LOG.info("appAttemptId.getAttemptId()=" + appAttemptId.getAttemptId());
if (appAttemptId.getAttemptId() > 1) {
waitForAllPsRegisted();
waitForAllMetricsInited();
angelApp.startExecute();
}
}
use of com.tencent.angel.master.worker.WorkerManager in project angel by Tencent.
the class PSAgentTest method testPSClient.
@Test
public void testPSClient() throws Exception {
try {
AngelApplicationMaster angelAppMaster = LocalClusterContext.get().getMaster().getAppMaster();
assertTrue(angelAppMaster != null);
AMTaskManager taskManager = angelAppMaster.getAppContext().getTaskManager();
assertTrue(taskManager != null);
WorkerManager workerManager = angelAppMaster.getAppContext().getWorkerManager();
assertTrue(workerManager != null);
Worker worker = LocalClusterContext.get().getWorker(worker0Attempt0Id).getWorker();
assertTrue(worker != null);
PSAgent psAgent = worker.getPSAgent();
assertTrue(psAgent != null);
// psAgent.initAndStart();
// test conf
Configuration conf = psAgent.getConf();
assertTrue(conf != null);
assertEquals(conf.get(AngelConf.ANGEL_DEPLOY_MODE), "LOCAL");
// test master location
Location masterLoc = psAgent.getMasterLocation();
String ipRegex = "(2[5][0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})\\.(25[0-5]|2[0-4]\\d|1\\d{2}|\\d{1,2})";
Pattern pattern = Pattern.compile(ipRegex);
Matcher matcher = pattern.matcher(masterLoc.getIp());
assertTrue(matcher.matches());
assertTrue(masterLoc.getPort() >= 1 && masterLoc.getPort() <= 65535);
// test app id
ApplicationId appId = psAgent.getAppId();
// test user
String user = psAgent.getUser();
// test ps agent attempt id
PSAgentAttemptId psAgentAttemptId = psAgent.getId();
assertEquals(psAgentAttemptId.toString(), "PSAgentAttempt_0_0");
assertEquals(psAgentAttemptId.getIndex(), 0);
// test ps agent id
PSAgentId psAgentId = psAgentAttemptId.getPsAgentId();
assertEquals(psAgentId.toString(), "PSAgent_0");
assertEquals(psAgentId.getIndex(), 0);
// test connection
TConnection conn = psAgent.getConnection();
assertTrue(conn != null);
// test master client
MasterClient masterClient = psAgent.getMasterClient();
assertTrue(masterClient != null);
// test ip
String ip = psAgent.getIp();
matcher = pattern.matcher(ip);
assertTrue(matcher.matches());
// test loc
Location loc = psAgent.getLocation();
assertTrue(loc != null);
matcher = pattern.matcher(loc.getIp());
assertTrue(matcher.matches());
assertTrue(loc.getPort() >= 1 && loc.getPort() <= 65535);
} catch (Exception x) {
LOG.error("run testPSClient failed ", x);
throw x;
}
}
Aggregations