use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class AbstractExecutionController method getLog.
// ====================================================================================================================
// Execution logs
// ====================================================================================================================
@Override
public JobLogDTO getLog(Execution execution, JobLogDTO.LogType type) throws JobException {
if (!execution.getState().isFinalState()) {
throw new JobException(RESTCodes.JobErrorCode.JOB_EXECUTION_INVALID_STATE, Level.FINE, "Job still running.");
}
JobLogDTO dto = new JobLogDTO(type);
DistributedFileSystemOps dfso = null;
try {
dfso = dfs.getDfsOps();
String message;
String stdPath;
String path = (dto.getType() == JobLogDTO.LogType.OUT ? execution.getStdoutPath() : execution.getStderrPath());
JobLogDTO.Retriable retriable = (dto.getType() == JobLogDTO.LogType.OUT ? JobLogDTO.Retriable.RETRIEABLE_OUT : JobLogDTO.Retriable.RETRIABLE_ERR);
boolean status = (dto.getType() != JobLogDTO.LogType.OUT || execution.getFinalStatus().equals(JobFinalStatus.SUCCEEDED));
String hdfsPath = REMOTE_PROTOCOL + path;
if (!Strings.isNullOrEmpty(path) && dfso.exists(hdfsPath)) {
Project project = execution.getJob().getProject();
stdPath = path.split(project.getName())[1];
int fileIndex = stdPath.lastIndexOf('/');
String stdDirPath = stdPath.substring(0, fileIndex);
dto.setPath(Settings.DIR_ROOT + File.separator + project.getName() + stdDirPath + File.separator + "std" + dto.getType().getName().toLowerCase() + ".log");
if (dfso.listStatus(new org.apache.hadoop.fs.Path(hdfsPath))[0].getLen() > settings.getJobLogsDisplaySize()) {
dto.setLog("Log is too big to display in browser. Click on the download button to get the log file.");
} else {
try (InputStream input = dfso.open(hdfsPath)) {
message = IOUtils.toString(input, "UTF-8");
}
dto.setLog(message.isEmpty() ? "No information." : message);
if (message.isEmpty() && execution.getState().isFinalState() && execution.getAppId() != null && status) {
dto.setRetriable(retriable);
}
}
} else {
String logMsg = "No log available.";
if (execution.getJob().getJobType() == JobType.PYTHON) {
logMsg += " If job failed instantaneously, please check again later or try running the job again. Log " + "aggregation can take a few minutes to complete.";
dto.setLog(logMsg);
}
if (execution.getState().isFinalState() && execution.getAppId() != null && status) {
dto.setRetriable(retriable);
}
}
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, null, ex);
} finally {
if (dfso != null) {
dfso.close();
}
}
return dto;
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class AbstractExecutionController method start.
@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public Execution start(Jobs job, String args, Users user) throws JobException, GenericException, ServiceException, ProjectException {
// If the limit for the number of executions for this job has been reached, return an error
checkExecutionLimit(job);
// A user should not be able to start a job if the project is prepaid and it doesn't have quota.
if (job.getProject().getPaymentType().equals(PaymentType.PREPAID)) {
YarnProjectsQuota projectQuota = yarnProjectsQuotaFacade.findByProjectName(job.getProject().getName());
if (projectQuota == null || projectQuota.getQuotaRemaining() <= 0) {
throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_QUOTA_ERROR, Level.FINE);
}
}
// If enabled and nodemanagers are all offline throw an JobException exception
if (settings.isCheckingForNodemanagerStatusEnabled() && job.getJobType() != JobType.PYTHON) {
hostServicesFacade.findServices("nodemanager").stream().filter(s -> s.getStatus() == ServiceStatus.Started).findFirst().orElseThrow(() -> new JobException(RESTCodes.JobErrorCode.NODEMANAGERS_OFFLINE, Level.SEVERE));
}
Execution exec;
switch(job.getJobType()) {
case FLINK:
// Materialize certs
return flinkController.startJob(job, user);
case SPARK:
exec = sparkController.startJob(job, args, user);
if (exec == null) {
throw new IllegalArgumentException("Problem getting execution object for: " + job.getJobType());
}
SparkJobConfiguration config = (SparkJobConfiguration) job.getJobConfig();
String path = config.getAppPath();
String pathOfInode;
try {
pathOfInode = Utils.prepPath(path);
} catch (UnsupportedEncodingException ex) {
throw new JobException(RESTCodes.JobErrorCode.JOB_START_FAILED, Level.FINE, "Job name: " + job.getName(), ex.getMessage(), ex);
}
Inode inode = inodeController.getInodeAtPath(pathOfInode);
String inodeName = inode.getInodePK().getName();
activityFacade.persistActivity(ActivityFacade.EXECUTED_JOB + inodeName, job.getProject(), user, ActivityFlag.JOB);
break;
case PYSPARK:
if (job.getProject().getPythonEnvironment() == null) {
throw new ProjectException(RESTCodes.ProjectErrorCode.ANACONDA_NOT_ENABLED, Level.FINEST);
}
exec = sparkController.startJob(job, args, user);
if (exec == null) {
throw new IllegalArgumentException("Error while getting execution object for: " + job.getJobType());
}
break;
default:
throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ACTION, Level.FINE, "Unsupported job type: " + job.getJobType());
}
return exec;
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class FlinkController method startJob.
public Execution startJob(final Jobs job, final Users user) throws GenericException, JobException, ServiceException {
// First: some parameter checking.
if (job == null) {
throw new NullPointerException("Cannot run a null job.");
} else if (user == null) {
throw new NullPointerException("Cannot run a job as a null user.");
} else if (job.getJobType() != JobType.FLINK) {
throw new IllegalArgumentException("Job configuration is not a Flink job configuration.");
}
// Set Hopsworks consul service domain, don't use the address, use the name
String username = hdfsUsersBean.getHdfsUserName(job.getProject(), user);
FlinkJob flinkjob = null;
try {
String hopsworksRestEndpoint = "https://" + serviceDiscoveryController.constructServiceFQDNWithPort(ServiceDiscoveryController.HopsworksService.HOPSWORKS_APP);
UserGroupInformation proxyUser = ugiService.getProxyUser(username);
try {
flinkjob = proxyUser.doAs((PrivilegedExceptionAction<FlinkJob>) () -> new FlinkJob(job, submitter, user, hdfsUsersBean.getHdfsUserName(job.getProject(), job.getCreator()), settings, kafkaBrokers.getKafkaBrokersString(), hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
} catch (InterruptedException ex) {
LOGGER.log(Level.SEVERE, null, ex);
}
} catch (IOException ex) {
throw new JobException(RESTCodes.JobErrorCode.PROXY_ERROR, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
} catch (ServiceDiscoveryException ex) {
throw new ServiceException(RESTCodes.ServiceErrorCode.SERVICE_NOT_FOUND, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
}
if (flinkjob == null) {
throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ERROR, Level.WARNING, "Could not instantiate job with name: " + job.getName() + " and id: " + job.getId(), "sparkjob object was null");
}
Execution execution = flinkjob.requestExecutionId();
submitter.startExecution(flinkjob);
activityFacade.persistActivity(ActivityFacade.RAN_JOB, job.getProject(), user.asUser(), ActivityFlag.JOB);
return execution;
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class JobController method putJob.
public Jobs putJob(Users user, Project project, Jobs job, JobConfiguration config) throws JobException {
try {
if (config.getJobType() == JobType.SPARK || config.getJobType() == JobType.PYSPARK) {
SparkConfigurationUtil sparkConfigurationUtil = new SparkConfigurationUtil();
SparkJobConfiguration sparkJobConfiguration = (SparkJobConfiguration) config;
sparkConfigurationUtil.validateExecutorMemory(sparkJobConfiguration.getExecutorMemory(), settings);
}
job = jobFacade.put(user, project, config, job);
} catch (IllegalStateException ise) {
if (ise.getCause() instanceof JAXBException) {
throw new JobException(RESTCodes.JobErrorCode.JOB_CONFIGURATION_CONVERT_TO_JSON_ERROR, Level.FINE, "Unable to create json from JobConfiguration", ise.getMessage(), ise);
} else {
throw ise;
}
}
if (config.getSchedule() != null) {
scheduler.scheduleJobPeriodic(job);
}
activityFacade.persistActivity(ActivityFacade.CREATED_JOB + getJobNameForActivity(job.getName()), project, user, ActivityFlag.JOB);
return job;
}
use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.
the class FeaturegroupService method deleteFeaturegroupContents.
/**
* Endpoint for deleting the contents of the featuregroup.
* As HopsHive do not support ACID transactions the way to delete the contents of a table is to drop the table and
* re-create it, which also will drop the featuregroup metadata due to ON DELETE CASCADE foreign key rule.
* This method stores the metadata of the featuregroup before deleting it and then re-creates the featuregroup with
* the same metadata.
* <p>
* This endpoint is typically used when the user wants to insert data into a featuregroup with the write-mode
* 'overwrite' instead of default mode 'append'
*
* @param featuregroupId the id of the featuregroup
* @throws FeaturestoreException
* @throws HopsSecurityException
*/
@POST
@Path("/{featuregroupId}/clear")
@Produces(MediaType.APPLICATION_JSON)
@AllowedProjectRoles({ AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST })
@JWTRequired(acceptedTokens = { Audience.API, Audience.JOB }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER", "HOPS_SERVICE_USER" })
@ApiKeyRequired(acceptedScopes = { ApiScope.FEATURESTORE }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER", "HOPS_SERVICE_USER" })
@ApiOperation(value = "Delete featuregroup contents")
public Response deleteFeaturegroupContents(@Context SecurityContext sc, @Context HttpServletRequest req, @ApiParam(value = "Id of the featuregroup", required = true) @PathParam("featuregroupId") Integer featuregroupId) throws FeaturestoreException, ServiceException, KafkaException, SchemaException, ProjectException, UserException {
verifyIdProvided(featuregroupId);
Users user = jWTHelper.getUserPrincipal(sc);
// Verify that the user has the data-owner role or is the creator of the featuregroup
Featuregroup featuregroup = featuregroupController.getFeaturegroupById(featurestore, featuregroupId);
try {
FeaturegroupDTO newFeatureGroup = featuregroupController.clearFeaturegroup(featuregroup, project, user);
return Response.ok().entity(newFeatureGroup).build();
} catch (SQLException | IOException | ProvenanceException | HopsSecurityException | JobException e) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.COULD_NOT_CLEAR_FEATUREGROUP, Level.SEVERE, "project: " + project.getName() + ", featurestoreId: " + featurestore.getId() + ", featuregroupId: " + featuregroupId, e.getMessage(), e);
}
}
Aggregations