use of edu.iu.dsc.tws.common.zk.WorkerWithState in project twister2 by DSC-SPIDAL.
the class JMWorkerHandler method registerWorkerMessageReceived.
private void registerWorkerMessageReceived(RequestID id, JobMasterAPI.RegisterWorker message) {
// if all workers connected, set it
handleAllConnected();
if (zkUsed) {
int wID = message.getWorkerInfo().getWorkerID();
LOG.fine("Since ZooKeeper is used, ignoring RegisterWorker message for worker: " + wID);
sendRegisterWorkerResponse(id, wID, true, null);
// if all workers connected with this worker,
// publish all joined event to the driver if exists
// if zk is not used, this is handled in workerMonitor
workerMonitor.informDriverForAllJoined();
return;
}
LOG.fine("RegisterWorker message received: \n" + message);
JobMasterAPI.WorkerInfo workerInfo = message.getWorkerInfo();
boolean initialAllJoined = workerMonitor.isAllJoined();
int restartCount = message.getRestartCount();
JobMasterAPI.WorkerState initialState = restartCount > 0 ? JobMasterAPI.WorkerState.RESTARTED : JobMasterAPI.WorkerState.STARTED;
WorkerWithState workerWithState = new WorkerWithState(workerInfo, initialState, restartCount);
if (initialState == JobMasterAPI.WorkerState.RESTARTED) {
// if it is coming from failure
sendRegisterWorkerResponse(id, workerInfo.getWorkerID(), true, null);
workerMonitor.restarted(workerWithState);
} else {
// if there is already a worker with this ID, return fail message
if (workerMonitor.existWorker(workerWithState.getWorkerID())) {
String failMessage = "There is an already registered worker with workerID: " + workerWithState.getWorkerID();
LOG.severe(failMessage);
sendRegisterWorkerResponse(id, workerInfo.getWorkerID(), false, failMessage);
return;
}
// send a success response
sendRegisterWorkerResponse(id, workerInfo.getWorkerID(), true, null);
// if it is not coming from failure
workerMonitor.started(workerWithState);
}
// if all workers registered, inform all workers
if (!initialAllJoined && workerMonitor.isAllJoined()) {
LOG.info("All workers joined the job. Worker IDs: " + workerMonitor.getWorkerIDs());
sendListWorkersResponseToWaitList();
allJoined();
}
}
use of edu.iu.dsc.tws.common.zk.WorkerWithState in project twister2 by DSC-SPIDAL.
the class ZKMasterController method childZnodeUpdated.
/**
* when the status of a worker updated in the persistent worker znode,
* take necessary actions
*/
private void childZnodeUpdated(PathChildrenCacheEvent event) {
String childPath = event.getData().getPath();
int workerID = ZKUtils.getWorkerIDFromPersPath(childPath);
WorkerWithState workerWithState = WorkerWithState.decode(event.getData().getData());
LOG.fine(String.format("Worker[%s] status changed to: %s ", workerID, workerWithState.getState()));
// inform workerMonitor when the worker becomes COMPLETED
if (workerWithState.getState() == JobMasterAPI.WorkerState.COMPLETED) {
workerMonitor.completed(workerID);
}
if (workerWithState.getState() == JobMasterAPI.WorkerState.FULLY_FAILED) {
workerMonitor.fullyFailed(workerID);
}
}
use of edu.iu.dsc.tws.common.zk.WorkerWithState in project twister2 by DSC-SPIDAL.
the class ZKJobLister method listJob.
/**
* list a single job info from zk server
* @param jobID
*/
public static void listJob(String jobID) {
CuratorFramework client = ZKUtils.connectToServer(ZKContext.serverAddresses(config));
String rootPath = ZKContext.rootNode(config);
JobWithState job;
List<WorkerWithState> workers;
try {
job = JobZNodeManager.readJobZNode(client, rootPath, jobID);
workers = ZKPersStateManager.getWorkers(client, rootPath, jobID);
} catch (Exception e) {
LOG.log(Level.SEVERE, "Could not get the job from zookeeper: " + jobID, e);
return;
}
if (workers.size() == 0) {
LOG.info("\nNumber of workers in the job: 0");
return;
}
int maxWorkerIPLength = workers.stream().mapToInt(w -> w.getInfo().getWorkerIP().length()).max().orElseThrow(() -> new RuntimeException("No valid workerIP in WorkerInfo"));
StringBuilder buffer = new StringBuilder();
Formatter f = new Formatter(buffer);
f.format("\n\n%s", "JobID: " + job.getJob().getJobId());
f.format("\n%s", "Job State: " + job.getState());
f.format("\n%s", "Number of Workers: " + job.getJob().getNumberOfWorkers());
f.format("\n%s", "");
f.format("\n%s", "List of Workers: " + "\n");
int workerIDColumn = "WorkerID".length() + 3;
int workerIPColumn = maxWorkerIPLength + 3;
String format = "%-" + workerIDColumn + "s%-" + workerIPColumn + "s%s\n";
int lineWidth = workerIDColumn + workerIPColumn + "Worker State".length();
String separator = StringUtils.repeat('=', lineWidth);
f.format(format, "WorkerID", "WorkerIP", "Worker State");
f.format("%s\n", separator);
for (WorkerWithState wws : workers) {
f.format(format, "" + wws.getWorkerID(), wws.getInfo().getWorkerIP(), wws.getState().toString());
}
LOG.info(buffer.toString());
}
Aggregations