Search in sources :

Example 6 with Twister2JobState

use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.

the class AuroraLauncher method launch.

/**
 * Launch the processes according to the resource plan.
 *
 * @return true if the request is granted
 */
@Override
public Twister2JobState launch(JobAPI.Job job) {
    String jobName = job.getJobName();
    // construct the controller to submit the job to Aurora Scheduler
    String cluster = AuroraContext.auroraClusterName(config);
    String role = AuroraContext.role(config);
    String env = AuroraContext.environment(config);
    AuroraClientController controller = new AuroraClientController(cluster, role, env, jobName, true);
    // get aurora file name to execute when submitting the job
    String auroraFilename = AuroraContext.auroraScript(config);
    // get environment variables from config
    Map<AuroraField, String> bindings = constructEnvVariables(config, job);
    // convert RequestedResources to environment variables, override previous values from config
    JobAPI.ComputeResource computeResource = job.getComputeResource(0);
    bindings.put(AuroraField.JOB_NAME, jobName);
    bindings.put(AuroraField.AURORA_WORKER_CLASS, AuroraContext.auroraWorkerClass(config));
    bindings.put(AuroraField.CPUS_PER_WORKER, computeResource.getCpu() + "");
    bindings.put(AuroraField.RAM_PER_WORKER, ComputeResourceUtils.getRamInBytes(computeResource) + "");
    bindings.put(AuroraField.DISK_PER_WORKER, ComputeResourceUtils.getDiskInBytes(computeResource) + "");
    bindings.put(AuroraField.NUMBER_OF_WORKERS, job.getNumberOfWorkers() + "");
    logEnvVariables(bindings);
    Twister2JobState state = new Twister2JobState(controller.createJob(bindings, auroraFilename));
    return state;
}
Also used : Twister2JobState(edu.iu.dsc.tws.api.scheduler.Twister2JobState) JobAPI(edu.iu.dsc.tws.proto.system.job.JobAPI)

Example 7 with Twister2JobState

use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.

the class Twister2Submitter method restartJob.

/**
 * Restart a Twister2 job
 */
public static Twister2JobState restartJob(String jobID, Config config) {
    // job package filename from failed submission
    String prevJobDir = FsContext.uploaderJobDirectory(config) + File.separator + jobID;
    String jobPackage = prevJobDir + File.separator + SchedulerContext.jobPackageFileName(config);
    Path jobPackageFile = Paths.get(jobPackage);
    if (Files.notExists(jobPackageFile)) {
        LOG.severe("Job Package File does not exist: " + jobPackage);
        return new Twister2JobState(false);
    }
    // unpack the previous job package to a temp directory
    Path tempDirPath;
    try {
        tempDirPath = Files.createTempDirectory(jobID);
    } catch (IOException e) {
        throw new Twister2RuntimeException("Can not create temp directory", e);
    }
    // todo: we can exclude user-job-file from being unpacked
    // usually that is the lastest file, so we can be more efficient
    TarGzipPacker.unpack(jobPackageFile, tempDirPath);
    // load Job object
    String unpackedJobDir = tempDirPath + File.separator + Context.JOB_ARCHIVE_DIRECTORY;
    String jobFile = unpackedJobDir + File.separator + SchedulerContext.createJobDescriptionFileName(jobID);
    JobAPI.Job job = JobUtils.readJobFile(jobFile);
    // load previous configurations
    Config prevConfig = ConfigLoader.loadConfig(Context.twister2Home(config), unpackedJobDir, Context.clusterType(config));
    // delete temp directory
    try {
        Files.delete(tempDirPath);
        LOG.info("Unpacked job directory deleted: " + tempDirPath);
    } catch (IOException e) {
        LOG.warning("Exception when deleting temp directory: " + tempDirPath);
    }
    URI packageURI = null;
    try {
        packageURI = new URI(prevJobDir);
    } catch (URISyntaxException e) {
        throw new Twister2RuntimeException("Can not ceate URI for directory: " + prevJobDir, e);
    }
    // add restore parameter
    // local packages path
    prevConfig = Config.newBuilder().putAll(prevConfig).put(CheckpointingContext.CHECKPOINTING_RESTORE_JOB, true).put(SchedulerContext.TEMPORARY_PACKAGES_PATH, prevJobDir).put(SchedulerContext.USER_JOB_FILE, job.getJobFormat().getJobFile()).put(SchedulerContext.JOB_PACKAGE_URI, packageURI).put(Context.TWISTER2_HOME.getKey(), Context.twister2Home(config)).put(Context.JOB_ID, jobID).put(Context.TWISTER2_CLUSTER_TYPE, Context.clusterType(config)).build();
    writeJobIdToFile(jobID);
    printJobInfo(job, prevConfig);
    // launch the launcher
    ResourceAllocator resourceAllocator = new ResourceAllocator(prevConfig, job);
    return resourceAllocator.resubmitJob();
}
Also used : Path(java.nio.file.Path) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Twister2JobState(edu.iu.dsc.tws.api.scheduler.Twister2JobState) Config(edu.iu.dsc.tws.api.config.Config) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) JobAPI(edu.iu.dsc.tws.proto.system.job.JobAPI) URI(java.net.URI)

Example 8 with Twister2JobState

use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project beam by apache.

the class Twister2Runner method run.

@Override
public PipelineResult run(Pipeline pipeline) {
    // create a worker and pass in the pipeline and then do the translation
    Twister2PipelineExecutionEnvironment env = new Twister2PipelineExecutionEnvironment(options);
    LOG.info("Translating pipeline to Twister2 program.");
    pipeline.replaceAll(getDefaultOverrides());
    // TODO(BEAM-10670): Use SDF read as default when we address performance issue.
    if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) {
        SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    }
    env.translate(pipeline);
    setupSystem(options);
    Map configMap = new HashMap();
    JobConfig jobConfig = new JobConfig();
    if (isLocalMode(options)) {
        options.setParallelism(1);
        configMap.put(SIDEINPUTS, extractNames(env.getSideInputs()));
        configMap.put(LEAVES, extractNames(env.getLeaves()));
        configMap.put(GRAPH, env.getTSetGraph());
        configMap.put("twister2.network.buffer.size", 32000);
        configMap.put("twister2.network.sendBuffer.count", options.getParallelism());
        LOG.warning("Twister2 Local Mode currently only supports single worker");
    } else {
        jobConfig.put(SIDEINPUTS, extractNames(env.getSideInputs()));
        jobConfig.put(LEAVES, extractNames(env.getLeaves()));
        jobConfig.put(GRAPH, env.getTSetGraph());
    }
    Config config = ResourceAllocator.loadConfig(configMap);
    int workers = options.getParallelism();
    Twister2Job twister2Job = Twister2Job.newBuilder().setJobName(options.getJobName()).setWorkerClass(BeamBatchWorker.class).addComputeResource(options.getWorkerCPUs(), options.getRamMegaBytes(), workers).setConfig(jobConfig).build();
    Twister2JobState jobState;
    if (isLocalMode(options)) {
        jobState = LocalSubmitter.submitJob(twister2Job, config);
    } else {
        jobState = Twister2Submitter.submitJob(twister2Job, config);
    }
    Twister2PipelineResult result = new Twister2PipelineResult(jobState);
    return result;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Twister2JobState(edu.iu.dsc.tws.api.scheduler.Twister2JobState) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) PTransformOverride(org.apache.beam.sdk.runners.PTransformOverride)

Example 9 with Twister2JobState

use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project beam by apache.

the class Twister2Runner method runTest.

public PipelineResult runTest(Pipeline pipeline) {
    // create a worker and pass in the pipeline and then do the translation
    Twister2PipelineExecutionEnvironment env = new Twister2PipelineExecutionEnvironment(options);
    LOG.info("Translating pipeline to Twister2 program.");
    pipeline.replaceAll(getDefaultOverrides());
    // TODO(BEAM-10670): Use SDF read as default when we address performance issue.
    if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) {
        SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    }
    env.translate(pipeline);
    setupSystemTest(options);
    Map configMap = new HashMap();
    configMap.put(SIDEINPUTS, extractNames(env.getSideInputs()));
    configMap.put(LEAVES, extractNames(env.getLeaves()));
    configMap.put(GRAPH, env.getTSetGraph());
    configMap.put("twister2.network.buffer.size", 32000);
    configMap.put("twister2.network.sendBuffer.count", options.getParallelism());
    Config config = ResourceAllocator.loadConfig(configMap);
    JobConfig jobConfig = new JobConfig();
    int workers = options.getParallelism();
    Twister2Job twister2Job = Twister2Job.newBuilder().setJobName(options.getJobName()).setWorkerClass(BeamBatchWorker.class).addComputeResource(options.getWorkerCPUs(), options.getRamMegaBytes(), workers).setConfig(jobConfig).build();
    Twister2JobState jobState = LocalSubmitter.submitJob(twister2Job, config);
    Twister2PipelineResult result = new Twister2PipelineResult(jobState);
    // TODO: Need to fix the check for "RUNNING" once fix for this is done on Twister2 end.
    if (result.state == PipelineResult.State.FAILED) {
        throw new RuntimeException("Pipeline execution failed", jobState.getCause());
    }
    return result;
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Twister2JobState(edu.iu.dsc.tws.api.scheduler.Twister2JobState) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job)

Example 10 with Twister2JobState

use of edu.iu.dsc.tws.api.scheduler.Twister2JobState in project twister2 by DSC-SPIDAL.

the class KubernetesLauncher method launch.

/**
 * Launch the processes according to the resource plan.
 *
 * @return true if the request is granted
 */
@Override
public Twister2JobState launch(JobAPI.Job job) {
    Twister2JobState state = new Twister2JobState(false);
    if (!configParametersOK(job)) {
        return state;
    }
    String jobID = job.getJobId();
    String jobPackageFile = SchedulerContext.temporaryPackagesPath(config) + "/" + SchedulerContext.jobPackageFileName(config);
    File jobFile = new File(jobPackageFile);
    if (!jobFile.exists()) {
        LOG.log(Level.SEVERE, "Can not access job package file: " + jobPackageFile + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
        return state;
    }
    long jobFileSize = jobFile.length();
    // check all relevant entities on Kubernetes master
    boolean allEntitiesOK = checkEntitiesForJob(job);
    if (!allEntitiesOK) {
        return state;
    }
    long jobSubmitTime = System.currentTimeMillis();
    String encodedNodeList = getNodeInfoList();
    RequestObjectBuilder.init(config, job.getJobId(), jobFileSize, jobSubmitTime, encodedNodeList);
    JobMasterRequestObject.init(config, job.getJobId());
    // initialize the service in Kubernetes master
    boolean servicesCreated = initServices(jobID);
    if (!servicesCreated) {
        clearupWhenSubmissionFails(jobID);
        return state;
    }
    // create the ConfigMap
    V1ConfigMap configMap = RequestObjectBuilder.createConfigMap(job);
    boolean cmCreated = controller.createConfigMap(configMap);
    if (cmCreated) {
        jobSubmissionStatus.setConfigMapCreated(true);
    } else {
        LOG.severe("Following ConfigMap could not be created: " + configMap.getMetadata().getName() + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
        clearupWhenSubmissionFails(jobID);
        return state;
    }
    // if persistent volume is requested, create a persistent volume claim
    if (SchedulerContext.persistentVolumeRequested(config)) {
        // create pvc
        if (!CheckpointingContext.startingFromACheckpoint(config) || !CheckpointingContext.isNfsUsed(config)) {
            boolean volumesSetup = initPersistentVolumeClaim(job);
            if (!volumesSetup) {
                clearupWhenSubmissionFails(jobID);
                return state;
            }
        }
    }
    // initialize StatefulSets for this job
    boolean statefulSetInitialized = initStatefulSets(job);
    if (!statefulSetInitialized) {
        clearupWhenSubmissionFails(jobID);
        return state;
    }
    // start the Job Master locally if requested
    if (JobMasterContext.jobMasterRunsInClient(config)) {
        boolean jobMasterCompleted = startJobMasterOnClient(job);
        if (!jobMasterCompleted) {
            LOG.log(Level.SEVERE, "JobMaster can not be started. " + "\n++++++++++++++++++ Aborting submission ++++++++++++++++++");
            clearupWhenSubmissionFails(jobID);
            return state;
        }
    }
    if (KubernetesContext.logInClient(config)) {
        jobLogger = new JobLogger(namespace, job);
        jobLogger.start();
    }
    state.setRequestGranted(true);
    return state;
}
Also used : Twister2JobState(edu.iu.dsc.tws.api.scheduler.Twister2JobState) JobLogger(edu.iu.dsc.tws.rsched.schedulers.k8s.logger.JobLogger) File(java.io.File) V1ConfigMap(io.kubernetes.client.openapi.models.V1ConfigMap)

Aggregations

Twister2JobState (edu.iu.dsc.tws.api.scheduler.Twister2JobState)12 Config (edu.iu.dsc.tws.api.config.Config)4 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)3 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)3 IOException (java.io.IOException)3 JobConfig (edu.iu.dsc.tws.api.JobConfig)2 ILauncher (edu.iu.dsc.tws.api.scheduler.ILauncher)2 IUploader (edu.iu.dsc.tws.api.scheduler.IUploader)2 JobAPI (edu.iu.dsc.tws.proto.system.job.JobAPI)2 URI (java.net.URI)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 IScalerPerCluster (edu.iu.dsc.tws.api.driver.IScalerPerCluster)1 NullScaler (edu.iu.dsc.tws.api.driver.NullScaler)1 Twister2Exception (edu.iu.dsc.tws.api.exceptions.Twister2Exception)1 IController (edu.iu.dsc.tws.api.scheduler.IController)1 JobMaster (edu.iu.dsc.tws.master.server.JobMaster)1 JobMasterAPI (edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI)1 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)1