use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.
the class JobController method getConfiguration.
@TransactionAttribute(TransactionAttributeType.NEVER)
public JobConfiguration getConfiguration(Project project, JobType jobType, boolean useDefaultConfig) {
Optional<DefaultJobConfiguration> defaultConfig;
if (jobType.equals(JobType.SPARK) || jobType.equals(JobType.PYSPARK)) {
/**
* The Spark and PySpark configuration is stored in the same configuration entry in the
* database for DefaultJobConfiguration. Namely in a PySpark configuration. We infer the JobType based on if
* you set a .jar or .py. However when creating the DefaultJobConfiguration, as part of the PK the JobType
* needs to be set. So for now PySpark/Spark shares the same configuration.
*/
defaultConfig = project.getDefaultJobConfigurationCollection().stream().filter(conf -> conf.getDefaultJobConfigurationPK().getType().equals(JobType.PYSPARK)).findFirst();
defaultConfig.ifPresent(defaultJobConfiguration -> ((SparkJobConfiguration) defaultJobConfiguration.getJobConfig()).setMainClass(null));
} else {
defaultConfig = project.getDefaultJobConfigurationCollection().stream().filter(conf -> conf.getDefaultJobConfigurationPK().getType().equals(jobType)).findFirst();
}
if (defaultConfig.isPresent()) {
return defaultConfig.get().getJobConfig();
} else if (useDefaultConfig) {
switch(jobType) {
case SPARK:
case PYSPARK:
return new SparkJobConfiguration();
case FLINK:
return new FlinkJobConfiguration();
default:
throw new IllegalArgumentException("Job type not supported: " + jobType);
}
} else {
return null;
}
}
use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.
the class JobController method inspectProgram.
@TransactionAttribute(TransactionAttributeType.NEVER)
public JobConfiguration inspectProgram(String path, Project project, Users user, JobType jobType) throws JobException {
DistributedFileSystemOps udfso = null;
try {
String username = hdfsUsersBean.getHdfsUserName(project, user);
udfso = dfs.getDfsOps(username);
LOGGER.log(Level.FINE, "Inspecting executable job program by {0} at path: {1}", new Object[] { username, path });
JobConfiguration jobConf = getConfiguration(project, jobType, true);
switch(jobType) {
case SPARK:
case PYSPARK:
if (Strings.isNullOrEmpty(path) || !(path.endsWith(".jar") || path.endsWith(".py") || path.endsWith(".ipynb"))) {
throw new IllegalArgumentException("Path does not point to a .jar, .py or .ipynb file.");
}
return sparkController.inspectProgram((SparkJobConfiguration) jobConf, path, udfso);
case FLINK:
return jobConf;
default:
throw new IllegalArgumentException("Job type not supported: " + jobType);
}
} finally {
if (udfso != null) {
dfs.closeDfsClient(udfso);
}
}
}
use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.
the class JobController method putJob.
public Jobs putJob(Users user, Project project, Jobs job, JobConfiguration config) throws JobException {
try {
if (config.getJobType() == JobType.SPARK || config.getJobType() == JobType.PYSPARK) {
SparkConfigurationUtil sparkConfigurationUtil = new SparkConfigurationUtil();
SparkJobConfiguration sparkJobConfiguration = (SparkJobConfiguration) config;
sparkConfigurationUtil.validateExecutorMemory(sparkJobConfiguration.getExecutorMemory(), settings);
}
job = jobFacade.put(user, project, config, job);
} catch (IllegalStateException ise) {
if (ise.getCause() instanceof JAXBException) {
throw new JobException(RESTCodes.JobErrorCode.JOB_CONFIGURATION_CONVERT_TO_JSON_ERROR, Level.FINE, "Unable to create json from JobConfiguration", ise.getMessage(), ise);
} else {
throw ise;
}
}
if (config.getSchedule() != null) {
scheduler.scheduleJobPeriodic(job);
}
activityFacade.persistActivity(ActivityFacade.CREATED_JOB + getJobNameForActivity(job.getName()), project, user, ActivityFlag.JOB);
return job;
}
use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.
the class FsJobManagerController method configureJob.
private Jobs configureJob(Users user, Project project, SparkJobConfiguration sparkJobConfiguration, String jobName, String defaultArgs) throws JobException {
if (sparkJobConfiguration == null) {
// set defaults for spark job size
sparkJobConfiguration = new SparkJobConfiguration();
}
sparkJobConfiguration.setAppName(jobName);
sparkJobConfiguration.setMainClass(Settings.SPARK_PY_MAINCLASS);
sparkJobConfiguration.setAppPath(settings.getFSJobUtilPath());
sparkJobConfiguration.setDefaultArgs(defaultArgs);
return jobController.putJob(user, project, null, sparkJobConfiguration);
}
use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.
the class ModelsController method versionProgram.
public String versionProgram(Accessor accessor, String jobName, String kernelId, String modelName, int modelVersion) throws JobException, ServiceException {
if (!Strings.isNullOrEmpty(jobName)) {
// model in job
Jobs experimentJob = jobController.getJob(accessor.experimentProject, jobName);
switch(experimentJob.getJobType()) {
case SPARK:
case PYSPARK:
{
SparkJobConfiguration sparkJobConf = (SparkJobConfiguration) experimentJob.getJobConfig();
String suffix = sparkJobConf.getAppPath().substring(sparkJobConf.getAppPath().lastIndexOf("."));
String relativePath = Settings.HOPS_MODELS_DATASET + "/" + modelName + "/" + modelVersion + "/program" + suffix;
Path path = new Path(Utils.getProjectPath(accessor.modelProject.getName()) + relativePath);
jobController.versionProgram(sparkJobConf.getAppPath(), accessor.udfso, path);
return relativePath;
}
case PYTHON:
{
throw new IllegalArgumentException("python jobs unavailable in community");
}
default:
throw new IllegalArgumentException("cannot version program for job type:" + experimentJob.getJobType());
}
} else {
// model in jupyter
String relativePath = Settings.HOPS_MODELS_DATASET + "/" + modelName + "/" + modelVersion + "/program.ipynb";
Path path = new Path(Utils.getProjectPath(accessor.modelProject.getName()) + relativePath);
jupyterController.versionProgram(accessor.hdfsUser, kernelId, path, accessor.udfso);
return relativePath;
}
}
Aggregations