use of edu.iu.dsc.tws.common.zk.ZKJobMasterRegistrar in project twister2 by DSC-SPIDAL.
the class ZKJobMasterRegistrarExample method main.
/**
* we assume that we have the Job Master IP address and the port number
* We will register this pair of information on a ZooKeeper server
* Workers will discover the Job Master address by querying this ZooKeeper server
* <p>
* If there is already a znode on the ZooKeeper with the same name,
* we delete that znode. It must be from a previous registration session
* <p>
* Parameters:
* the only parameter is the ZooKeeper server address
* <p>
* This class is used together with ZKJobMasterFinderExample.java
* This class registers the Job Master and that class discovers it
*/
public static void main(String[] args) {
if (args.length != 1) {
printUsage();
return;
}
String zkAddress = args[0];
String jobID = "test-job";
Config cnfg = buildConfig(zkAddress);
String jobMasterIP = "x.y.z.t";
// get the default port
int jobMasterPort = JobMasterContext.jobMasterPort(cnfg);
ZKJobMasterRegistrar registrar = new ZKJobMasterRegistrar(cnfg, jobMasterIP, jobMasterPort, jobID);
boolean initialized = registrar.initialize();
if (!initialized && registrar.sameZNodeExist()) {
registrar.deleteJobMasterZNode();
registrar.initialize();
}
try {
long waitDuration = 30;
LOG.info("Waiting " + waitDuration + "seconds. Will exit afterwards...");
Thread.sleep(waitDuration * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
registrar.close();
LOG.info("Done, exiting ...");
}
use of edu.iu.dsc.tws.common.zk.ZKJobMasterRegistrar in project twister2 by DSC-SPIDAL.
the class MesosJobMasterStarter method main.
public static void main(String[] args) {
// we can not initialize the logger fully yet,
// but we need to set the format as the first thing
String homeDir = System.getenv("HOME");
int workerId = Integer.parseInt(System.getenv("WORKER_ID"));
String jobName = System.getenv("JOB_NAME");
String jobId = System.getenv("JOB_ID");
String twister2Home = Paths.get("").toAbsolutePath().toString();
String configDir = "twister2-job";
Config config = ConfigLoader.loadConfig(twister2Home, configDir, "mesos");
Config.Builder builder = Config.newBuilder().putAll(config);
builder.put(Context.JOB_ID, jobId);
config = builder.build();
JobTerminator terminator = new JobTerminator(config, System.getenv("FRAMEWORK_ID"));
MesosWorkerLogger logger = new MesosWorkerLogger(config, "/persistent-volume/logs", "master");
logger.initLogging();
edu.iu.dsc.tws.rsched.schedulers.mesos.MesosController controller;
controller = new edu.iu.dsc.tws.rsched.schedulers.mesos.MesosController(config);
JobAPI.Job job = JobUtils.readJobFile("twister2-job/" + jobName + ".job");
// try {
// workerController = new MesosWorkerController(config, job,
// Inet4Address.getLocalHost().getHostAddress(), 2023, workerId);
// LOG.info("Initializing with zookeeper");
// workerController.initializeWithZooKeeper();
// LOG.info("Waiting for all workers to join");
// workerController.getAllWorkers(
// ZKContext.maxWaitTimeForAllWorkersToJoin(config));
// LOG.info("Everyone has joined");
// // //container.execute(worker.config, id, null, workerController, null);
//
//
// } catch (Exception e) {
// LOG.severe("Error " + e.getMessage());
// }
// this block is for ZKjobmaster register
ZKJobMasterRegistrar registrar = null;
try {
registrar = new ZKJobMasterRegistrar(config, Inet4Address.getLocalHost().getHostAddress(), 11011, job.getJobId());
LOG.info("JobMaster REGISTERED..:" + Inet4Address.getLocalHost().getHostAddress());
} catch (UnknownHostException e) {
LOG.info("JobMaster CAN NOT BE REGISTERED:");
e.printStackTrace();
}
boolean initialized = registrar.initialize();
if (!initialized) {
LOG.info("CAN NOT INITIALIZE");
}
if (!initialized && registrar.sameZNodeExist()) {
registrar.deleteJobMasterZNode();
registrar.initialize();
}
if (!JobMasterContext.jobMasterRunsInClient(config)) {
JobMaster jobMaster;
try {
String workerIp = Inet4Address.getLocalHost().getHostAddress();
JobMasterAPI.NodeInfo jobMasterNodeInfo = MesosContext.getNodeInfo(config, workerIp);
IScalerPerCluster clusterScaler = new NullScaler();
MesosScaler mesosScaler = new MesosScaler(config, job, controller);
mesosScaler.setFrameWorkId(System.getenv("FRAMEWORK_ID"));
JobMasterAPI.JobMasterState initialState = JobMasterAPI.JobMasterState.JM_STARTED;
// JobMaster.jobID = jobId;
jobMaster = new JobMaster(config, InetAddress.getLocalHost().getHostAddress(), terminator, job, jobMasterNodeInfo, clusterScaler, initialState);
// jobMaster.jobId = jobId;
LOG.info("JobMaster host address...:" + InetAddress.getLocalHost().getHostAddress());
jobMaster.startJobMasterBlocking();
// jobMaster.startJobMasterThreaded();
} catch (Exception e) {
LOG.log(Level.SEVERE, "Exception when getting local host address: ", e);
}
}
waitIndefinitely();
registrar.deleteJobMasterZNode();
registrar.close();
}
use of edu.iu.dsc.tws.common.zk.ZKJobMasterRegistrar in project twister2 by DSC-SPIDAL.
the class NomadJobMasterStarter method launch.
/**
* launch the job master
*
* @return false if setup fails
*/
public boolean launch() {
// get the job working directory
/* String jobWorkingDirectory = NomadContext.workingDirectory(config);
LOG.log(Level.INFO, "job working directory ....." + jobWorkingDirectory);
if (NomadContext.sharedFileSystem(config)) {
if (!setupWorkingDirectory(job, jobWorkingDirectory)) {
throw new RuntimeException("Failed to setup the directory");
}
}
Config newConfig = Config.newBuilder().putAll(config).put(
SchedulerContext.WORKING_DIRECTORY, jobWorkingDirectory).build();
// now start the controller, which will get the resources from
// slurm and start the job
//IController controller = new NomadController(true);
controller.initialize(newConfig);*/
String indexEnv = System.getenv("NOMAD_ALLOC_INDEX");
String idEnv = System.getenv("NOMAD_ALLOC_ID");
int workerID = Integer.valueOf(indexEnv);
MPIWorkerStarter.initJMLogger(config);
LOG.log(Level.INFO, String.format("Worker id = %s and index = %d", idEnv, workerID));
ZKJobMasterRegistrar registrar = null;
int port = JobMasterContext.jobMasterPort(config);
String hostAddress = null;
try {
hostAddress = Inet4Address.getLocalHost().getHostAddress();
} catch (UnknownHostException e) {
e.printStackTrace();
}
try {
registrar = new ZKJobMasterRegistrar(config, hostAddress, port, job.getJobId());
LOG.info("JobMaster REGISTERED..:" + hostAddress);
} catch (Exception e) {
LOG.info("JobMaster CAN NOT BE REGISTERED:");
e.printStackTrace();
}
boolean initialized = registrar.initialize();
if (!initialized) {
LOG.info("CAN NOT INITIALIZE");
}
if (!initialized && registrar.sameZNodeExist()) {
registrar.deleteJobMasterZNode();
registrar.initialize();
}
// start the Job Master locally
JobMaster jobMaster = null;
JobMasterAPI.NodeInfo jobMasterNodeInfo = NomadContext.getNodeInfo(config, hostAddress);
IScalerPerCluster clusterScaler = new NullScaler();
Thread jmThread = null;
int workerCount = job.getNumberOfWorkers();
LOG.info("Worker Count..: " + workerCount);
// if you want to set it manually
// if (JobMasterContext.jobMasterIP(config) != null) {
// hostAddress = JobMasterContext.jobMasterIP(config);
// }
LOG.log(Level.INFO, String.format("Starting the Job Master: %s:%d", hostAddress, port));
JobMasterAPI.JobMasterState initialState = JobMasterAPI.JobMasterState.JM_STARTED;
NullTerminator nt = new NullTerminator();
jobMaster = new JobMaster(config, hostAddress, nt, job, jobMasterNodeInfo, clusterScaler, initialState);
jobMaster.addShutdownHook(true);
try {
jobMaster.startJobMasterBlocking();
} catch (Twister2Exception e) {
LOG.log(Level.SEVERE, e.getMessage(), e);
}
// jmThread = jobMaster.startJobMasterThreaded();
waitIndefinitely();
registrar.deleteJobMasterZNode();
registrar.close();
boolean start = controller.start(job);
// }
return start;
}
Aggregations