use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class AuroraLauncher method launch.
/**
* Launch the processes according to the resource plan.
*
* @return true if the request is granted
*/
@Override
public Twister2JobState launch(JobAPI.Job job) {
String jobName = job.getJobName();
// construct the controller to submit the job to Aurora Scheduler
String cluster = AuroraContext.auroraClusterName(config);
String role = AuroraContext.role(config);
String env = AuroraContext.environment(config);
AuroraClientController controller = new AuroraClientController(cluster, role, env, jobName, true);
// get aurora file name to execute when submitting the job
String auroraFilename = AuroraContext.auroraScript(config);
// get environment variables from config
Map<AuroraField, String> bindings = constructEnvVariables(config, job);
// convert RequestedResources to environment variables, override previous values from config
JobAPI.ComputeResource computeResource = job.getComputeResource(0);
bindings.put(AuroraField.JOB_NAME, jobName);
bindings.put(AuroraField.AURORA_WORKER_CLASS, AuroraContext.auroraWorkerClass(config));
bindings.put(AuroraField.CPUS_PER_WORKER, computeResource.getCpu() + "");
bindings.put(AuroraField.RAM_PER_WORKER, ComputeResourceUtils.getRamInBytes(computeResource) + "");
bindings.put(AuroraField.DISK_PER_WORKER, ComputeResourceUtils.getDiskInBytes(computeResource) + "");
bindings.put(AuroraField.NUMBER_OF_WORKERS, job.getNumberOfWorkers() + "");
logEnvVariables(bindings);
Twister2JobState state = new Twister2JobState(controller.createJob(bindings, auroraFilename));
return state;
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class Twister2Submitter method restartJob.
/**
* Restart a Twister2 job
*/
public static Twister2JobState restartJob(String jobID, Config config) {
// job package filename from failed submission
String prevJobDir = FsContext.uploaderJobDirectory(config) + File.separator + jobID;
String jobPackage = prevJobDir + File.separator + SchedulerContext.jobPackageFileName(config);
Path jobPackageFile = Paths.get(jobPackage);
if (Files.notExists(jobPackageFile)) {
LOG.severe("Job Package File does not exist: " + jobPackage);
return new Twister2JobState(false);
}
// unpack the previous job package to a temp directory
Path tempDirPath;
try {
tempDirPath = Files.createTempDirectory(jobID);
} catch (IOException e) {
throw new Twister2RuntimeException("Can not create temp directory", e);
}
// todo: we can exclude user-job-file from being unpacked
// usually that is the lastest file, so we can be more efficient
TarGzipPacker.unpack(jobPackageFile, tempDirPath);
// load Job object
String unpackedJobDir = tempDirPath + File.separator + Context.JOB_ARCHIVE_DIRECTORY;
String jobFile = unpackedJobDir + File.separator + SchedulerContext.createJobDescriptionFileName(jobID);
JobAPI.Job job = JobUtils.readJobFile(jobFile);
// load previous configurations
Config prevConfig = ConfigLoader.loadConfig(Context.twister2Home(config), unpackedJobDir, Context.clusterType(config));
// delete temp directory
try {
Files.delete(tempDirPath);
LOG.info("Unpacked job directory deleted: " + tempDirPath);
} catch (IOException e) {
LOG.warning("Exception when deleting temp directory: " + tempDirPath);
}
URI packageURI = null;
try {
packageURI = new URI(prevJobDir);
} catch (URISyntaxException e) {
throw new Twister2RuntimeException("Can not ceate URI for directory: " + prevJobDir, e);
}
// add restore parameter
// local packages path
prevConfig = Config.newBuilder().putAll(prevConfig).put(CheckpointingContext.CHECKPOINTING_RESTORE_JOB, true).put(SchedulerContext.TEMPORARY_PACKAGES_PATH, prevJobDir).put(SchedulerContext.USER_JOB_FILE, job.getJobFormat().getJobFile()).put(SchedulerContext.JOB_PACKAGE_URI, packageURI).put(Context.TWISTER2_HOME.getKey(), Context.twister2Home(config)).put(Context.JOB_ID, jobID).put(Context.TWISTER2_CLUSTER_TYPE, Context.clusterType(config)).build();
writeJobIdToFile(jobID);
printJobInfo(job, prevConfig);
// launch the launcher
ResourceAllocator resourceAllocator = new ResourceAllocator(prevConfig, job);
return resourceAllocator.resubmitJob();
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project beam by apache.
the class Twister2Runner method run.
@Override
public PipelineResult run(Pipeline pipeline) {
// create a worker and pass in the pipeline and then do the translation
Twister2PipelineExecutionEnvironment env = new Twister2PipelineExecutionEnvironment(options);
LOG.info("Translating pipeline to Twister2 program.");
pipeline.replaceAll(getDefaultOverrides());
// TODO(BEAM-10670): Use SDF read as default when we address performance issue.
if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) {
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
}
env.translate(pipeline);
setupSystem(options);
Map configMap = new HashMap();
JobConfig jobConfig = new JobConfig();
if (isLocalMode(options)) {
options.setParallelism(1);
configMap.put(SIDEINPUTS, extractNames(env.getSideInputs()));
configMap.put(LEAVES, extractNames(env.getLeaves()));
configMap.put(GRAPH, env.getTSetGraph());
configMap.put("twister2.network.buffer.size", 32000);
configMap.put("twister2.network.sendBuffer.count", options.getParallelism());
LOG.warning("Twister2 Local Mode currently only supports single worker");
} else {
jobConfig.put(SIDEINPUTS, extractNames(env.getSideInputs()));
jobConfig.put(LEAVES, extractNames(env.getLeaves()));
jobConfig.put(GRAPH, env.getTSetGraph());
}
Config config = ResourceAllocator.loadConfig(configMap);
int workers = options.getParallelism();
Twister2Job twister2Job = Twister2Job.newBuilder().setJobName(options.getJobName()).setWorkerClass(BeamBatchWorker.class).addComputeResource(options.getWorkerCPUs(), options.getRamMegaBytes(), workers).setConfig(jobConfig).build();
Twister2JobState jobState;
if (isLocalMode(options)) {
jobState = LocalSubmitter.submitJob(twister2Job, config);
} else {
jobState = Twister2Submitter.submitJob(twister2Job, config);
}
Twister2PipelineResult result = new Twister2PipelineResult(jobState);
return result;
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project beam by apache.
the class Twister2Runner method runTest.
public PipelineResult runTest(Pipeline pipeline) {
// create a worker and pass in the pipeline and then do the translation
Twister2PipelineExecutionEnvironment env = new Twister2PipelineExecutionEnvironment(options);
LOG.info("Translating pipeline to Twister2 program.");
pipeline.replaceAll(getDefaultOverrides());
// TODO(BEAM-10670): Use SDF read as default when we address performance issue.
if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) {
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
}
env.translate(pipeline);
setupSystemTest(options);
Map configMap = new HashMap();
configMap.put(SIDEINPUTS, extractNames(env.getSideInputs()));
configMap.put(LEAVES, extractNames(env.getLeaves()));
configMap.put(GRAPH, env.getTSetGraph());
configMap.put("twister2.network.buffer.size", 32000);
configMap.put("twister2.network.sendBuffer.count", options.getParallelism());
Config config = ResourceAllocator.loadConfig(configMap);
JobConfig jobConfig = new JobConfig();
int workers = options.getParallelism();
Twister2Job twister2Job = Twister2Job.newBuilder().setJobName(options.getJobName()).setWorkerClass(BeamBatchWorker.class).addComputeResource(options.getWorkerCPUs(), options.getRamMegaBytes(), workers).setConfig(jobConfig).build();
Twister2JobState jobState = LocalSubmitter.submitJob(twister2Job, config);
Twister2PipelineResult result = new Twister2PipelineResult(jobState);
// TODO: Need to fix the check for "RUNNING" once fix for this is done on Twister2 end.
if (result.state == PipelineResult.State.FAILED) {
throw new RuntimeException("Pipeline execution failed", jobState.getCause());
}
return result;
}
use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.
the class KubernetesLauncher method launch.
/**
* Launch the processes according to the resource plan.
*
* @return true if the request is granted
*/
@Override
public Twister2JobState launch(JobAPI.Job job) {
Twister2JobState state = new Twister2JobState(false);
if (!configParametersOK(job)) {
return state;
}
String jobID = job.getJobId();
String jobPackageFile = SchedulerContext.temporaryPackagesPath(config) + "/" + SchedulerContext.jobPackageFileName(config);
File jobFile = new File(jobPackageFile);
if (!jobFile.exists()) {
LOG.log(Level.SEVERE, "Can not access job package file: " + jobPackageFile + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
return state;
}
long jobFileSize = jobFile.length();
// check all relevant entities on Kubernetes master
boolean allEntitiesOK = checkEntitiesForJob(job);
if (!allEntitiesOK) {
return state;
}
long jobSubmitTime = System.currentTimeMillis();
String encodedNodeList = getNodeInfoList();
RequestObjectBuilder.init(config, job.getJobId(), jobFileSize, jobSubmitTime, encodedNodeList);
JobMasterRequestObject.init(config, job.getJobId());
// initialize the service in Kubernetes master
boolean servicesCreated = initServices(jobID);
if (!servicesCreated) {
clearupWhenSubmissionFails(jobID);
return state;
}
// create the ConfigMap
V1ConfigMap configMap = RequestObjectBuilder.createConfigMap(job);
boolean cmCreated = controller.createConfigMap(configMap);
if (cmCreated) {
jobSubmissionStatus.setConfigMapCreated(true);
} else {
LOG.severe("Following ConfigMap could not be created: " + configMap.getMetadata().getName() + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
clearupWhenSubmissionFails(jobID);
return state;
}
// if persistent volume is requested, create a persistent volume claim
if (SchedulerContext.persistentVolumeRequested(config)) {
// create pvc
if (!CheckpointingContext.startingFromACheckpoint(config) || !CheckpointingContext.isNfsUsed(config)) {
boolean volumesSetup = initPersistentVolumeClaim(job);
if (!volumesSetup) {
clearupWhenSubmissionFails(jobID);
return state;
}
}
}
// initialize StatefulSets for this job
boolean statefulSetInitialized = initStatefulSets(job);
if (!statefulSetInitialized) {
clearupWhenSubmissionFails(jobID);
return state;
}
// start the Job Master locally if requested
if (JobMasterContext.jobMasterRunsInClient(config)) {
boolean jobMasterCompleted = startJobMasterOnClient(job);
if (!jobMasterCompleted) {
LOG.log(Level.SEVERE, "JobMaster can not be started. " + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
clearupWhenSubmissionFails(jobID);
return state;
}
}
if (KubernetesContext.logInClient(config)) {
jobLogger = new JobLogger(namespace, job);
jobLogger.start();
}
state.setRequestGranted(true);
return state;
}
Aggregations