use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class MasterClient method workerRegister.
/**
* Register to master, report the listening port
*
* @return WorkerRegisterResponse worker register response
* @throws ServiceException rpc falied
*/
public WorkerRegisterResponse workerRegister() throws ServiceException {
Location location = WorkerContext.get().getLocation();
WorkerRegisterRequest request = WorkerRegisterRequest.newBuilder().setWorkerAttemptId(WorkerContext.get().getWorkerAttemptIdProto()).setLocation(LocationProto.newBuilder().setIp(location.getIp()).setPort(location.getPort()).build()).setPsAgentId(WorkerContext.get().getPSAgent().getId()).build();
return master.workerRegister(null, request);
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class PSAgentLocationManager method getPsLocation.
/**
* Get PS location
*
* @param psId ps id
* @param sync true means get from Master, false means just get from local cache
* @return ps location
* @throws ServiceException
*/
public Location getPsLocation(ParameterServerId psId, boolean sync) throws ServiceException {
if (!sync) {
return locationManager.getPsLocation(psId);
} else {
Location location = context.getMasterClient().getPSLocation(psId);
setPsLocation(psId, location);
return location;
}
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class KubernetesWorkerApp method main.
public static void main(String[] args) throws IOException {
LOG.info("Starting worker...");
// get configuration from envs
Configuration conf = new Configuration();
ConfUtils.addResourceProperties(conf, Constants.ANGEL_CONF_PATH());
long clusterTimestamp = Long.parseLong(conf.get(AngelConf.ANGEL_KUBERNETES_APP_CLUSTERTIMESTAMP));
int randomId = Integer.parseInt(conf.get(AngelConf.ANGEL_KUBERNETES_APP_RANDOMID));
ApplicationId appId = ApplicationId.newInstance(clusterTimestamp, randomId);
String user = System.getenv(ApplicationConstants.Environment.USER.name());
int workerGroupIndex = Integer.parseInt(System.getenv(Constants.ENV_EXECUTOR_ID()));
int workerIndex = workerGroupIndex;
int attemptIndex = Integer.parseInt(System.getenv(Constants.ENV_EXECUTOR_ATTEMPT_ID()));
WorkerGroupId workerGroupId = new WorkerGroupId(workerGroupIndex);
WorkerId workerId = new WorkerId(workerGroupId, workerIndex);
WorkerAttemptId workerAttemptId = new WorkerAttemptId(workerId, attemptIndex);
conf.set(AngelConf.ANGEL_WORKERGROUP_ACTUAL_NUM, System.getenv(Constants.ENV_ANGEL_WORKERGROUP_NUMBER()));
conf.set(AngelConf.ANGEL_TASK_ACTUAL_NUM, System.getenv(Constants.ENV_ANGEL_TASK_NUMBER()));
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, System.getenv(Constants.ENV_ANGEL_USER_TASK()));
LOG.info("actual workergroup number:" + conf.get(AngelConf.ANGEL_WORKERGROUP_ACTUAL_NUM));
LOG.info("actual task number:" + conf.get(AngelConf.ANGEL_TASK_ACTUAL_NUM));
// get master location
String appMasterHost = System.getenv(Constants.ENV_MASTER_BIND_ADDRESS());
int appMasterPort = Integer.valueOf(System.getenv(Constants.ENV_MASTER_BIND_PORT()));
Location masterLocation = new Location(appMasterHost, appMasterPort);
LOG.info("appMasterHost is " + appMasterHost + ", appMasterPort is " + appMasterPort);
conf.setBoolean("mapred.mapper.new-api", true);
Worker worker = new Worker(AngelConf.clone(conf), appId, user, workerAttemptId, masterLocation, 0, false);
try {
worker.initAndStart();
} catch (Exception e) {
LOG.fatal("Failed to start worker.", e);
worker.error(e.getMessage());
}
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class Worker method main.
public static void main(String[] args) {
// get configuration from config file
Configuration conf = new Configuration();
conf.addResource(AngelConf.ANGEL_JOB_CONF_FILE);
String containerIdStr = System.getenv(Environment.CONTAINER_ID.name());
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();
ApplicationId appId = applicationAttemptId.getApplicationId();
String user = System.getenv(Environment.USER.name());
// set localDir with enviroment set by nm.
String[] localSysDirs = StringUtils.getTrimmedStrings(System.getenv(Environment.LOCAL_DIRS.name()));
conf.setStrings(AngelConf.LOCAL_DIR, localSysDirs);
LOG.info(AngelConf.LOCAL_DIR + " for child: " + conf.get(AngelConf.LOCAL_DIR));
int workerGroupIndex = Integer.parseInt(System.getenv(AngelEnvironment.WORKER_GROUP_ID.name()));
int workerIndex = Integer.parseInt(System.getenv(AngelEnvironment.WORKER_ID.name()));
int attemptIndex = Integer.parseInt(System.getenv(AngelEnvironment.WORKER_ATTEMPT_ID.name()));
WorkerGroupId workerGroupId = new WorkerGroupId(workerGroupIndex);
WorkerId workerId = new WorkerId(workerGroupId, workerIndex);
WorkerAttemptId workerAttemptId = new WorkerAttemptId(workerId, attemptIndex);
conf.set(AngelConf.ANGEL_WORKERGROUP_ACTUAL_NUM, System.getenv(AngelEnvironment.WORKERGROUP_NUMBER.name()));
conf.set(AngelConf.ANGEL_TASK_ACTUAL_NUM, System.getenv(AngelEnvironment.TASK_NUMBER.name()));
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, System.getenv(AngelEnvironment.ANGEL_USER_TASK.name()));
LOG.info("actual workergroup number:" + conf.get(AngelConf.ANGEL_WORKERGROUP_ACTUAL_NUM));
LOG.info("actual task number:" + conf.get(AngelConf.ANGEL_TASK_ACTUAL_NUM));
// get master location
String masterAddr = System.getenv(AngelEnvironment.LISTEN_ADDR.name());
String portStr = System.getenv(AngelEnvironment.LISTEN_PORT.name());
Location masterLocation = new Location(masterAddr, Integer.valueOf(portStr));
String startClock = System.getenv(AngelEnvironment.INIT_MIN_CLOCK.name());
Worker worker = new Worker(AngelConf.clone(conf), appId, user, workerAttemptId, masterLocation, Integer.valueOf(startClock), false);
try {
worker.initAndStart();
} catch (Exception e) {
LOG.fatal("Failed to start worker.", e);
worker.error(e.getMessage());
}
}
use of com.tencent.angel.common.location.Location in project angel by Tencent.
the class InitNeighborTest method testCSR.
@Test
public void testCSR() throws Exception {
Worker worker = LocalClusterContext.get().getWorker(workerAttempt0Id).getWorker();
MatrixClient client = worker.getPSAgent().getMatrixClient(SPARSE_INT_MAT, 0);
int matrixId = client.getMatrixId();
ParameterServer ps = LocalClusterContext.get().getPS(psAttempt0Id).getPS();
Location masterLoc = LocalClusterContext.get().getMaster().getAppMaster().getAppContext().getMasterService().getLocation();
TConnection connection = TConnectionManager.getConnection(ps.getConf());
MasterProtocol master = connection.getMasterService(masterLoc.getIp(), masterLoc.getPort());
// Init node neighbors
Int2ObjectOpenHashMap<int[]> nodeIdToNeighbors = new Int2ObjectOpenHashMap<>();
nodeIdToNeighbors.put(1, new int[] { 2, 3 });
nodeIdToNeighbors.put(2, new int[] { 4 });
InitNeighbor func = new InitNeighbor(new InitNeighborParam(matrixId, nodeIdToNeighbors));
client.asyncUpdate(func).get();
nodeIdToNeighbors.clear();
nodeIdToNeighbors.put(1, new int[] { 4, 5, 6 });
nodeIdToNeighbors.put(2, new int[] { 5 });
nodeIdToNeighbors.put(4, new int[] { 5, 6 });
func = new InitNeighbor(new InitNeighborParam(matrixId, nodeIdToNeighbors));
client.asyncUpdate(func).get();
nodeIdToNeighbors.clear();
nodeIdToNeighbors.put(3, new int[] { 4, 5, 6 });
nodeIdToNeighbors.put(5, new int[] { 6 });
nodeIdToNeighbors.put(8, new int[] { 3, 4 });
func = new InitNeighbor(new InitNeighborParam(matrixId, nodeIdToNeighbors));
client.asyncUpdate(func).get();
nodeIdToNeighbors.clear();
client.asyncUpdate(new InitNeighborOver(new InitNeighborOverParam(matrixId))).get();
// Sample the neighbors
int[] nodeIds = new int[] { 1, 2, 3, 4, 5, 6, 7, 8 };
SampleNeighborParam param = new SampleNeighborParam(matrixId, nodeIds, -1);
Int2ObjectOpenHashMap<int[]> result = ((SampleNeighborResult) (client.get(new SampleNeighbor(param)))).getNodeIdToNeighbors();
ObjectIterator<Entry<int[]>> iter = result.int2ObjectEntrySet().fastIterator();
LOG.info("==============================sample neighbors result============================");
Entry<int[]> entry;
while (iter.hasNext()) {
entry = iter.next();
LOG.info("node id = " + entry.getIntKey() + ", neighbors = " + Arrays.toString(entry.getValue()));
}
client.checkpoint(0);
ps.stop(-1);
PSErrorRequest request = PSErrorRequest.newBuilder().setPsAttemptId(ProtobufUtil.convertToIdProto(psAttempt0Id)).setMsg("out of memory").build();
master.psError(null, request);
Thread.sleep(10000);
result = ((SampleNeighborResult) (client.get(new SampleNeighbor(param)))).getNodeIdToNeighbors();
iter = result.int2ObjectEntrySet().fastIterator();
LOG.info("==============================sample neighbors result============================");
while (iter.hasNext()) {
entry = iter.next();
LOG.info("node id = " + entry.getIntKey() + ", neighbors = " + Arrays.toString(entry.getValue()));
}
}
Aggregations