use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.
the class ZKEventsManager method getNumberOfPastEvents.
public static int getNumberOfPastEvents(CuratorFramework client, String rootPath, String jobID) throws Twister2Exception {
String eventsDir = ZKUtils.eventsDir(rootPath, jobID);
try {
int numberOfPastEvents = client.getChildren().forPath(eventsDir).size();
LOG.info("Number of past events: " + numberOfPastEvents);
return numberOfPastEvents;
} catch (Exception e) {
throw new Twister2Exception("Could not get children of events directory: " + eventsDir, e);
}
}
use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.
the class JobMasterExample method main.
/**
* this main method is for locally testing only
* A JobMaster instance is started locally on the default port:
* edu.iu.dsc.tws.master.JobMasterContext.JOB_MASTER_PORT_DEFAULT = 11011
* <p>
* numberOfWorkers to join is expected as a parameter
* <p>
* When all workers joined and all have sent completed messages,
* this server also completes and exits
* <p>
* En example usage of JobMaster can be seen in:
* edu.iu.dsc.tws.rsched.schedulers.k8s.master.JobMasterStarter
*/
public static void main(String[] args) {
if (args.length != 1) {
LOG.info("usage: java JobMasterExample numberOfWorkers");
return;
}
int numberOfWorkers = Integer.parseInt(args[0]);
String host = "0.0.0.0";
// we assume that the twister2Home is the current directory
// String configDir = "../twister2/config/src/yaml/";
String configDir = "";
String twister2Home = Paths.get(configDir).toAbsolutePath().toString();
Config config = ConfigLoader.loadConfig(twister2Home, "conf", "kubernetes");
config = JobMasterClientExample.updateConfig(config, config, host);
LOG.info("Loaded: " + config.size() + " configuration parameters.");
// Twister2Job twister2Job = Twister2Job.loadTwister2Job(config, null);
Twister2Job twister2Job = Twister2Job.newBuilder().setJobName("hello-world-job").setWorkerClass(HelloWorld.class).addComputeResource(.2, 128, numberOfWorkers).build();
twister2Job.setUserName(System.getProperty("user.name"));
JobAPI.Job job = twister2Job.serialize();
LOG.info("JobID: " + job.getJobId());
JobMasterAPI.JobMasterState initialState = JobMasterAPI.JobMasterState.JM_STARTED;
JobMasterStarter.job = job;
if (ZKContext.isZooKeeperServerUsed(config)) {
if ("start".equalsIgnoreCase(args[0])) {
JobMasterStarter.initializeZooKeeper(config, job.getJobId(), host, initialState);
} else if ("restart".equalsIgnoreCase(args[0])) {
initialState = JobMasterAPI.JobMasterState.JM_RESTARTED;
JobMasterStarter.initializeZooKeeper(config, job.getJobId(), host, initialState);
job = JobMasterStarter.job;
} else {
LOG.info("usage: java JobMasterExample start/restart");
return;
}
}
// write jobID to file
String dir = System.getProperty("user.home") + "/.twister2";
if (!FileUtils.isDirectoryExists(dir)) {
FileUtils.createDirectory(dir);
}
String filename = dir + "/last-job-id.txt";
FileUtils.writeToFile(filename, (job.getJobId() + "").getBytes(), true);
LOG.info("Written jobID to file: " + job.getJobId());
String ip = null;
try {
ip = Inet4Address.getLocalHost().getHostAddress();
} catch (UnknownHostException e) {
LOG.log(Level.SEVERE, e.getMessage(), e);
return;
}
JobMasterAPI.NodeInfo jobMasterNode = NodeInfoUtils.createNodeInfo(ip, null, null);
KubernetesController controller = KubernetesController.init("default");
K8sScaler k8sScaler = new K8sScaler(config, job, controller);
IJobTerminator jobTerminator = new NullTerminator();
JobMaster jobMaster = new JobMaster(config, host, jobTerminator, job, jobMasterNode, k8sScaler, initialState);
try {
// jobMaster.startJobMasterThreaded();
jobMaster.startJobMasterBlocking();
} catch (Twister2Exception e) {
LOG.log(Level.SEVERE, "Exception when starting Job master: ", e);
throw new RuntimeException(e);
}
LOG.info("Threaded Job Master started:" + "\nnumberOfWorkers: " + job.getNumberOfWorkers() + "\njobID: " + job.getJobId());
}
use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.
the class ZKPersStateManager method removeScaledDownZNodes.
/**
* When a job is scaled down, we must delete the znodes of killed workers.
* minID inclusive, maxID exclusive
*/
public static void removeScaledDownZNodes(CuratorFramework client, String rootPath, String jobID, int minID, int maxID) throws Twister2Exception {
String checkPath = ZKUtils.persDir(rootPath, jobID);
for (int workerID = minID; workerID < maxID; workerID++) {
String workerCheckPath = ZKUtils.workerPath(checkPath, workerID);
try {
// not sure whether we need to check the existence
if (client.checkExists().forPath(workerCheckPath) != null) {
client.delete().forPath(workerCheckPath);
LOG.info("Worker PersStateDir deleted: " + workerCheckPath);
}
} catch (Exception e) {
throw new Twister2Exception("Worker PersStateDir cannot be deleted: " + workerCheckPath, e);
}
}
}
use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.
the class ZKPersStateManager method updateWorkerStatus.
public static boolean updateWorkerStatus(CuratorFramework client, String rootPath, String jobID, WorkerInfo workerInfo, int restartCount, WorkerState newStatus) throws Twister2Exception {
String workersPersDir = ZKUtils.persDir(rootPath, jobID);
String workerPersPath = ZKUtils.workerPath(workersPersDir, workerInfo.getWorkerID());
WorkerWithState workerWithState = new WorkerWithState(workerInfo, newStatus, restartCount);
try {
client.setData().forPath(workerPersPath, workerWithState.toByteArray());
LOG.info("Worker status changed to: " + newStatus);
return true;
} catch (Exception e) {
throw new Twister2Exception("Could not update worker status in znode: " + workerInfo.getWorkerID(), e);
}
}
use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.
the class ZKPersStateManager method getWorkers.
/**
* return all registered workers
*/
public static LinkedList<WorkerWithState> getWorkers(CuratorFramework client, String rootPath, String jobID) throws Twister2Exception {
String workersPersDir = ZKUtils.persDir(rootPath, jobID);
try {
List<String> children = client.getChildren().forPath(workersPersDir);
LinkedList<WorkerWithState> workers = new LinkedList();
for (String childName : children) {
String childPath = workersPersDir + "/" + childName;
byte[] workerNodeBody = client.getData().forPath(childPath);
WorkerWithState workerWithState = WorkerWithState.decode(workerNodeBody);
workers.add(workerWithState);
}
return workers;
} catch (Exception e) {
throw new Twister2Exception("Could not get persistent worker znode data: " + workersPersDir, e);
}
}
Aggregations