use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class BatchTsetExample method submitJob.
public static void submitJob(Config config, int containers, JobConfig jobConfig, String clazz) {
Twister2Job twister2Job;
twister2Job = Twister2Job.newBuilder().setJobName(clazz.substring(clazz.lastIndexOf(".") + 1)).setWorkerClass(clazz).addComputeResource(1, 512, containers).setConfig(jobConfig).build();
// now submit the job
Twister2JobState state = Twister2Submitter.submitJob(twister2Job, config);
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class MPILauncher method launch.
@Override
public Twister2JobState launch(JobAPI.Job job) {
LOG.log(Level.INFO, "Launching job for cluster {0}", MPIContext.clusterType(config));
Twister2JobState state = new Twister2JobState(false);
if (!configsOK()) {
return state;
}
// distributing bundle if not running in shared file system
if (!MPIContext.isSharedFs(config)) {
LOG.info("Configured as NON SHARED file system. " + "Running bootstrap procedure to distribute files...");
try {
this.distributeJobFiles(job);
} catch (IOException e) {
LOG.log(Level.SEVERE, "Error in distributing job files", e);
throw new RuntimeException("Error in distributing job files");
}
} else {
LOG.info("Configured as SHARED file system. " + "Skipping bootstrap procedure & setting up working directory");
if (!setupWorkingDirectory(job.getJobId())) {
throw new RuntimeException("Failed to setup the directory");
}
}
config = Config.newBuilder().putAll(config).put(SchedulerContext.WORKING_DIRECTORY, jobWorkingDirectory).build();
JobMaster jobMaster = null;
Thread jmThread = null;
if (JobMasterContext.isJobMasterUsed(config) && JobMasterContext.jobMasterRunsInClient(config)) {
// Since the job master is running on client we can collect job information
state.setDetached(false);
try {
int port = NetworkUtils.getFreePort();
String hostAddress = JobMasterContext.jobMasterIP(config);
if (hostAddress == null) {
hostAddress = ResourceSchedulerUtils.getHostIP(config);
}
// add the port and ip to config
config = Config.newBuilder().putAll(config).put("__job_master_port__", port).put("__job_master_ip__", hostAddress).build();
LOG.log(Level.INFO, String.format("Starting the job master: %s:%d", hostAddress, port));
JobMasterAPI.NodeInfo jobMasterNodeInfo = NodeInfoUtils.createNodeInfo(hostAddress, "default", "default");
IScalerPerCluster nullScaler = new NullScaler();
JobMasterAPI.JobMasterState initialState = JobMasterAPI.JobMasterState.JM_STARTED;
NullTerminator nt = new NullTerminator();
jobMaster = new JobMaster(config, "0.0.0.0", port, nt, job, jobMasterNodeInfo, nullScaler, initialState);
jobMaster.addShutdownHook(true);
jmThread = jobMaster.startJobMasterThreaded();
} catch (Twister2Exception e) {
LOG.log(Level.SEVERE, "Exception when starting Job master: ", e);
throw new RuntimeException(e);
}
}
final boolean[] start = { false };
// now start the controller, which will get the resources and start
Thread controllerThread = new Thread(() -> {
IController controller = new MPIController(true);
controller.initialize(config);
start[0] = controller.start(job);
});
controllerThread.setName("MPIController");
controllerThread.start();
// wait until the controller finishes
try {
controllerThread.join();
} catch (InterruptedException ignore) {
}
// now lets wait on client
if (jmThread != null && JobMasterContext.isJobMasterUsed(config) && JobMasterContext.jobMasterRunsInClient(config)) {
try {
jmThread.join();
} catch (InterruptedException ignore) {
}
}
if (jobMaster != null && jobMaster.getDriver() != null) {
if (jobMaster.getDriver().getState() != DriverJobState.FAILED) {
state.setJobstate(DriverJobState.COMPLETED);
} else {
state.setJobstate(jobMaster.getDriver().getState());
}
state.setFinalMessages(jobMaster.getDriver().getMessages());
}
state.setRequestGranted(start[0]);
return state;
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class MesosLauncher method launch.
@Override
public Twister2JobState launch(JobAPI.Job job) {
Twister2JobState state = new Twister2JobState(false);
// runFramework(MesosContext.getMesosMasterUri(config), job.getJobName());
runFramework(MesosContext.getMesosMasterUri(config), job);
// TODO when to return true?
return state;
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class ResourceAllocator method submitJob.
/**
* Submit the job to the cluster
*/
public Twister2JobState submitJob() {
// check whether uploader and launcher classes are specified
checkUploaderAndLauncherClasses();
String jobDirectory = prepareJobFiles();
// upload the job package
IUploader uploader = uploadJobPackage();
// initialize the launcher and launch the job
ILauncher launcher = initializeLauncher();
Twister2JobState launchState = launcher.launch(job);
// clear job files and return
if (!launchState.isRequestGranted()) {
launcher.close();
if (!SchedulerContext.isLocalFileSystemUploader(config)) {
uploader.undo();
}
// clear temporary twister2 files
clearTemporaryFiles(jobDirectory);
return launchState;
}
// clear job resources and return
if (!uploader.complete()) {
LOG.log(Level.SEVERE, "Transferring the job package failed." + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
launcher.killJob(job.getJobId());
launchState.setRequestGranted(false);
launcher.close();
// clear temporary twister2 files
clearTemporaryFiles(jobDirectory);
return launchState;
}
// job is submitted successfully
// close the launcher
launcher.close();
// copy the job package to the local repository
if (CheckpointingContext.isCheckpointingEnabled(config) && !SchedulerContext.isLocalFileSystemUploader(config)) {
IUploader localUploader = new LocalFileSystemUploader();
localUploader.initialize(config, job.getJobId());
URI savedPackage = localUploader.uploadPackage(jobDirectory);
LOG.info("Saved Job Package to Directory: " + savedPackage.getPath());
}
if (!CheckpointingContext.isCheckpointingEnabled(config) && SchedulerContext.clusterType(config).equals("standalone") && SchedulerContext.isLocalFileSystemUploader(config)) {
uploader.undo();
}
// clear temporary twister2 files
clearTemporaryFiles(jobDirectory);
return launchState;
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class ResourceAllocator method resubmitJob.
/**
* Resubmit the job to the cluster
*/
public Twister2JobState resubmitJob() {
// check whether uploader and launcher classes are specified
checkUploaderAndLauncherClasses();
// upload the job package if it is not local upoader
IUploader uploader = null;
if (!SchedulerContext.isLocalFileSystemUploader(config)) {
uploader = uploadJobPackage();
}
// initialize the launcher and launch the job
ILauncher launcher = initializeLauncher();
Twister2JobState launchState = launcher.launch(job);
// clear job files and return
if (!launchState.isRequestGranted()) {
launcher.close();
if (uploader != null) {
uploader.undo();
}
return launchState;
}
// clear job resources and return
if (uploader != null && !uploader.complete()) {
LOG.log(Level.SEVERE, "Transferring the job package failed." + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
launcher.killJob(job.getJobId());
launchState.setRequestGranted(false);
launcher.close();
return launchState;
}
// job is submitted successfully
// close the launcher
launcher.close();
return launchState;
}
Aggregations