use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class HopsUtils method cleanupJobDatasetResources.
public static void cleanupJobDatasetResources(Jobs job, String hdfsUsername, DistributedFsService dfs) throws JobException {
String outPath = "hdfs://" + Utils.getProjectPath(job.getProject().getName()) + Settings.PROJECT_STAGING_DIR;
String pyJobPath = outPath + "/jobs/" + job.getName();
try {
removeFiles(pyJobPath, hdfsUsername, dfs);
} catch (DatasetException e) {
String msg = "failed to cleanup job dataset resoureces";
throw new JobException(RESTCodes.JobErrorCode.JOB_DELETION_ERROR, Level.INFO, msg, msg, e);
}
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class HopsUtils method cleanupExecutionDatasetResources.
public static void cleanupExecutionDatasetResources(Execution execution, String hdfsUsername, DistributedFsService dfs) throws JobException {
String outPath = "hdfs://" + Utils.getProjectPath(execution.getJob().getProject().getName()) + Settings.PROJECT_STAGING_DIR;
String pyJobPath = outPath + "/jobs/" + execution.getJob().getName();
String pyAppPath = pyJobPath + "/" + execution.getId() + ".py";
try {
removeFiles(pyAppPath, hdfsUsername, dfs);
} catch (DatasetException e) {
String msg = "failed to cleanup execution dataset resoureces";
throw new JobException(RESTCodes.JobErrorCode.JOB_DELETION_ERROR, Level.INFO, msg, msg, e);
}
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class SparkController method createSparkJob.
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
private SparkJob createSparkJob(String username, Jobs job, Users user) throws JobException, GenericException, ServiceException {
SparkJob sparkjob = null;
try {
// Set Hopsworks consul service domain, don't use the address, use the name
String hopsworksRestEndpoint = "https://" + serviceDiscoveryController.constructServiceFQDNWithPort(ServiceDiscoveryController.HopsworksService.HOPSWORKS_APP);
UserGroupInformation proxyUser = ugiService.getProxyUser(username);
try {
sparkjob = proxyUser.doAs((PrivilegedExceptionAction<SparkJob>) () -> new SparkJob(job, submitter, user, settings.getHadoopSymbolicLinkDir(), hdfsUsersBean.getHdfsUserName(job.getProject(), user), settings, kafkaBrokers.getKafkaBrokersString(), hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
} catch (InterruptedException ex) {
LOGGER.log(Level.SEVERE, null, ex);
}
} catch (IOException ex) {
throw new JobException(RESTCodes.JobErrorCode.PROXY_ERROR, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
} catch (ServiceDiscoveryException ex) {
throw new ServiceException(RESTCodes.ServiceErrorCode.SERVICE_NOT_FOUND, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
}
if (sparkjob == null) {
throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ERROR, Level.WARNING, "Could not instantiate job with name: " + job.getName() + " and id: " + job.getId(), "sparkjob object was null");
}
return sparkjob;
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class SparkController method startJob.
/**
* Start the Spark job as the given user.
* <p/>
* @param job
* @param user
* @return
* @throws IllegalStateException If Spark is not set up properly.
* @throws IOException If starting the job fails.
* Spark job.
*/
public Execution startJob(final Jobs job, String args, final Users user) throws ServiceException, GenericException, JobException, ProjectException {
// First: some parameter checking.
sanityCheck(job, user);
String username = hdfsUsersBean.getHdfsUserName(job.getProject(), user);
SparkJobConfiguration sparkConfig = (SparkJobConfiguration) job.getJobConfig();
String appPath = sparkConfig.getAppPath();
if (job.getJobType().equals(JobType.PYSPARK)) {
if (job.getProject().getPythonEnvironment() == null) {
// Throw error in Hopsworks UI to notify user to enable Anaconda
throw new JobException(RESTCodes.JobErrorCode.JOB_START_FAILED, Level.SEVERE, "PySpark job needs to have Python Anaconda environment enabled");
}
}
SparkJob sparkjob = createSparkJob(username, job, user);
Execution exec = sparkjob.requestExecutionId(args);
if (job.getJobType().equals(JobType.PYSPARK) && appPath.endsWith(".ipynb")) {
submitter.getExecutionFacade().updateState(exec, JobState.CONVERTING_NOTEBOOK);
String pyAppPath = HopsUtils.prepJupyterNotebookConversion(exec, username, dfs);
sparkConfig.setAppPath(pyAppPath);
jupyterController.convertIPythonNotebook(username, appPath, job.getProject(), pyAppPath, JupyterController.NotebookConversion.PY);
}
submitter.startExecution(sparkjob, args);
activityFacade.persistActivity(ActivityFacade.RAN_JOB + job.getName(), job.getProject(), user.asUser(), ActivityFlag.JOB);
return exec;
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class AbstractExecutionController method stopExecution.
public Execution stopExecution(Execution execution) throws JobException {
// An execution when it's initializing might not have an appId in hopsworks
if (execution.getAppId() != null && JobState.getRunningStates().contains(execution.getState())) {
YarnClientWrapper yarnClientWrapper = null;
try {
yarnClientWrapper = ycs.getYarnClientSuper(settings.getConfiguration());
yarnClientWrapper.getYarnClient().killApplication(ApplicationId.fromString(execution.getAppId()));
yarnExecutionFinalizer.removeAllNecessary(execution);
return executionFacade.findById(execution.getId()).orElseThrow(() -> new JobException(RESTCodes.JobErrorCode.JOB_EXECUTION_NOT_FOUND, FINE, "Execution: " + execution.getId()));
} catch (IOException | YarnException ex) {
LOGGER.log(Level.SEVERE, "Could not kill job for job:" + execution.getJob().getName() + "with appId:" + execution.getAppId(), ex);
throw new JobException(RESTCodes.JobErrorCode.JOB_STOP_FAILED, Level.WARNING, ex.getMessage(), null, ex);
} finally {
ycs.closeYarnClient(yarnClientWrapper);
}
}
return execution;
}
Aggregations