use of edu.iu.dsc.tws.common.net.tcp.Progress in project twister2 by DSC-SPIDAL.
the class JobMaster method init.
/**
* initialize the Job Master
*/
private void init() throws Twister2Exception {
looper = new Progress();
// if Dashboard is used, register this job with that
if (dashClient != null) {
boolean registered = dashClient.registerJob(job, nodeInfo);
if (!registered) {
LOG.warning("Not using Dashboard since it can not register with it.");
dashClient = null;
}
}
ServerConnectHandler connectHandler = new ServerConnectHandler();
int backLog = Math.min(job.getNumberOfWorkers() / 2, MAX_BACK_LOG);
rrServer = new RRServer(config, jmAddress, masterPort, looper, JOB_MASTER_ID, connectHandler, backLog);
// init Driver if it exists
// this ha to be done before WorkerMonitor initialization
initDriver();
JobFailureWatcher jobFailureWatcher = new JobFailureWatcher();
workerMonitor = new WorkerMonitor(this, rrServer, dashClient, zkJobUpdater, job, driver, jobFailureWatcher);
workerHandler = new JMWorkerHandler(workerMonitor, rrServer, ZKContext.isZooKeeperServerUsed(config));
if (!ZKContext.isZooKeeperServerUsed(config)) {
workerMonitor.setWorkerEventSender(workerHandler);
}
// initialize BarrierMonitor
barrierMonitor = new BarrierMonitor(workerMonitor, jobFailureWatcher);
if (ZKContext.isZooKeeperServerUsed(config)) {
zkBarrierHandler = new ZKBarrierHandler(barrierMonitor, config, job.getJobId(), job.getNumberOfWorkers());
barrierMonitor.setBarrierResponder(zkBarrierHandler);
zkBarrierHandler.initialize(initialState);
} else {
JMBarrierHandler jmBarrierHandler = new JMBarrierHandler(rrServer, barrierMonitor);
barrierMonitor.setBarrierResponder(jmBarrierHandler);
}
jobFailureWatcher.addJobFaultListener(barrierMonitor);
// if ZoKeeper server is used for this job, initialize that
try {
initZKMasterController(workerMonitor);
} catch (Twister2Exception e) {
throw e;
}
// initialize checkpoint manager
if (CheckpointingContext.isCheckpointingEnabled(config)) {
StateStore stateStore = CheckpointUtils.getStateStore(config);
stateStore.init(config, "checkpoint-manager");
this.checkpointManager = new CheckpointManager(this.rrServer, stateStore, job.getJobId());
jobFailureWatcher.addJobFaultListener(this.checkpointManager);
LOG.info("Checkpoint manager initialized");
this.checkpointManager.init();
}
// done initializing checkpoint manager
rrServer.start();
looper.loop();
}
use of edu.iu.dsc.tws.common.net.tcp.Progress in project twister2 by DSC-SPIDAL.
the class JMWorkerAgent method init.
/**
* initialize JMWorkerAgent
* wait until it connects to JobMaster
* return false, if it can not connect to JobMaster
*/
private void init() {
looper = new Progress();
ClientConnectHandler connectHandler = new ClientConnectHandler();
rrClient = new RRClient(jmAddress, jmPort, null, looper, thisWorker.getWorkerID(), connectHandler);
driverAgent = new JMDriverAgent(rrClient, thisWorker.getWorkerID());
statusUpdater = new JMWorkerStatusUpdater(rrClient, thisWorker.getWorkerID(), config);
// protocol buffer message registrations
ResponseMessageHandler handler = new ResponseMessageHandler();
rrClient.registerResponseHandler(JobMasterAPI.RegisterWorker.newBuilder(), handler);
rrClient.registerResponseHandler(JobMasterAPI.RegisterWorkerResponse.newBuilder(), handler);
rrClient.registerResponseHandler(JobMasterAPI.JobScaled.newBuilder(), handler);
rrClient.registerResponseHandler(JobMasterAPI.AllJoined.newBuilder(), handler);
// create checkpointing client
this.checkpointClient = new CheckpointingClientImpl(rrClient, CheckpointingContext.getRequestTimeout(config));
workerController = new JMWorkerController(config, thisWorker, numberOfWorkers, restartCount, rrClient, this.checkpointClient);
// try to connect to JobMaster
tryUntilConnected(CONNECTION_TRY_TIME_LIMIT);
if (!rrClient.isConnected()) {
throw new RuntimeException("JMWorkerAgent can not connect to Job Master. Exiting .....");
}
// initialize checkpointing client
this.checkpointClient.init();
}
Aggregations