Search in sources :

Example 6 with JobException

use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.

the class AbstractExecutionController method getLog.

// ====================================================================================================================
// Execution logs
// ====================================================================================================================
@Override
public JobLogDTO getLog(Execution execution, JobLogDTO.LogType type) throws JobException {
    if (!execution.getState().isFinalState()) {
        throw new JobException(RESTCodes.JobErrorCode.JOB_EXECUTION_INVALID_STATE, Level.FINE, "Job still running.");
    }
    JobLogDTO dto = new JobLogDTO(type);
    DistributedFileSystemOps dfso = null;
    try {
        dfso = dfs.getDfsOps();
        String message;
        String stdPath;
        String path = (dto.getType() == JobLogDTO.LogType.OUT ? execution.getStdoutPath() : execution.getStderrPath());
        JobLogDTO.Retriable retriable = (dto.getType() == JobLogDTO.LogType.OUT ? JobLogDTO.Retriable.RETRIEABLE_OUT : JobLogDTO.Retriable.RETRIABLE_ERR);
        boolean status = (dto.getType() != JobLogDTO.LogType.OUT || execution.getFinalStatus().equals(JobFinalStatus.SUCCEEDED));
        String hdfsPath = REMOTE_PROTOCOL + path;
        if (!Strings.isNullOrEmpty(path) && dfso.exists(hdfsPath)) {
            Project project = execution.getJob().getProject();
            stdPath = path.split(project.getName())[1];
            int fileIndex = stdPath.lastIndexOf('/');
            String stdDirPath = stdPath.substring(0, fileIndex);
            dto.setPath(Settings.DIR_ROOT + File.separator + project.getName() + stdDirPath + File.separator + "std" + dto.getType().getName().toLowerCase() + ".log");
            if (dfso.listStatus(new org.apache.hadoop.fs.Path(hdfsPath))[0].getLen() > settings.getJobLogsDisplaySize()) {
                dto.setLog("Log is too big to display in browser. Click on the download button to get the log file.");
            } else {
                try (InputStream input = dfso.open(hdfsPath)) {
                    message = IOUtils.toString(input, "UTF-8");
                }
                dto.setLog(message.isEmpty() ? "No information." : message);
                if (message.isEmpty() && execution.getState().isFinalState() && execution.getAppId() != null && status) {
                    dto.setRetriable(retriable);
                }
            }
        } else {
            String logMsg = "No log available.";
            if (execution.getJob().getJobType() == JobType.PYTHON) {
                logMsg += " If job failed instantaneously, please check again later or try running the job again. Log " + "aggregation can take a few minutes to complete.";
                dto.setLog(logMsg);
            }
            if (execution.getState().isFinalState() && execution.getAppId() != null && status) {
                dto.setRetriable(retriable);
            }
        }
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
    } finally {
        if (dfso != null) {
            dfso.close();
        }
    }
    return dto;
}
Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) IOException(java.io.IOException) JobException(io.hops.hopsworks.exceptions.JobException) JobLogDTO(io.hops.hopsworks.common.jobs.JobLogDTO) Project(io.hops.hopsworks.persistence.entity.project.Project)

Example 7 with JobException

use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.

the class AbstractExecutionController method start.

@Override
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public Execution start(Jobs job, String args, Users user) throws JobException, GenericException, ServiceException, ProjectException {
    // If the limit for the number of executions for this job has been reached, return an error
    checkExecutionLimit(job);
    // A user should not be able to start a job if the project is prepaid and it doesn't have quota.
    if (job.getProject().getPaymentType().equals(PaymentType.PREPAID)) {
        YarnProjectsQuota projectQuota = yarnProjectsQuotaFacade.findByProjectName(job.getProject().getName());
        if (projectQuota == null || projectQuota.getQuotaRemaining() <= 0) {
            throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_QUOTA_ERROR, Level.FINE);
        }
    }
    // If enabled and nodemanagers are all offline throw an JobException exception
    if (settings.isCheckingForNodemanagerStatusEnabled() && job.getJobType() != JobType.PYTHON) {
        hostServicesFacade.findServices("nodemanager").stream().filter(s -> s.getStatus() == ServiceStatus.Started).findFirst().orElseThrow(() -> new JobException(RESTCodes.JobErrorCode.NODEMANAGERS_OFFLINE, Level.SEVERE));
    }
    Execution exec;
    switch(job.getJobType()) {
        case FLINK:
            // Materialize certs
            return flinkController.startJob(job, user);
        case SPARK:
            exec = sparkController.startJob(job, args, user);
            if (exec == null) {
                throw new IllegalArgumentException("Problem getting execution object for: " + job.getJobType());
            }
            SparkJobConfiguration config = (SparkJobConfiguration) job.getJobConfig();
            String path = config.getAppPath();
            String pathOfInode;
            try {
                pathOfInode = Utils.prepPath(path);
            } catch (UnsupportedEncodingException ex) {
                throw new JobException(RESTCodes.JobErrorCode.JOB_START_FAILED, Level.FINE, "Job name: " + job.getName(), ex.getMessage(), ex);
            }
            Inode inode = inodeController.getInodeAtPath(pathOfInode);
            String inodeName = inode.getInodePK().getName();
            activityFacade.persistActivity(ActivityFacade.EXECUTED_JOB + inodeName, job.getProject(), user, ActivityFlag.JOB);
            break;
        case PYSPARK:
            if (job.getProject().getPythonEnvironment() == null) {
                throw new ProjectException(RESTCodes.ProjectErrorCode.ANACONDA_NOT_ENABLED, Level.FINEST);
            }
            exec = sparkController.startJob(job, args, user);
            if (exec == null) {
                throw new IllegalArgumentException("Error while getting execution object for: " + job.getJobType());
            }
            break;
        default:
            throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ACTION, Level.FINE, "Unsupported job type: " + job.getJobType());
    }
    return exec;
}
Also used : ProjectException(io.hops.hopsworks.exceptions.ProjectException) JobException(io.hops.hopsworks.exceptions.JobException) Execution(io.hops.hopsworks.persistence.entity.jobs.history.Execution) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) UnsupportedEncodingException(java.io.UnsupportedEncodingException) GenericException(io.hops.hopsworks.exceptions.GenericException) YarnProjectsQuota(io.hops.hopsworks.persistence.entity.jobs.quota.YarnProjectsQuota) TransactionAttribute(javax.ejb.TransactionAttribute)

Example 8 with JobException

use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.

the class FlinkController method startJob.

public Execution startJob(final Jobs job, final Users user) throws GenericException, JobException, ServiceException {
    // First: some parameter checking.
    if (job == null) {
        throw new NullPointerException("Cannot run a null job.");
    } else if (user == null) {
        throw new NullPointerException("Cannot run a job as a null user.");
    } else if (job.getJobType() != JobType.FLINK) {
        throw new IllegalArgumentException("Job configuration is not a Flink job configuration.");
    }
    // Set Hopsworks consul service domain, don't use the address, use the name
    String username = hdfsUsersBean.getHdfsUserName(job.getProject(), user);
    FlinkJob flinkjob = null;
    try {
        String hopsworksRestEndpoint = "https://" + serviceDiscoveryController.constructServiceFQDNWithPort(ServiceDiscoveryController.HopsworksService.HOPSWORKS_APP);
        UserGroupInformation proxyUser = ugiService.getProxyUser(username);
        try {
            flinkjob = proxyUser.doAs((PrivilegedExceptionAction<FlinkJob>) () -> new FlinkJob(job, submitter, user, hdfsUsersBean.getHdfsUserName(job.getProject(), job.getCreator()), settings, kafkaBrokers.getKafkaBrokersString(), hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
        } catch (InterruptedException ex) {
            LOGGER.log(Level.SEVERE, null, ex);
        }
    } catch (IOException ex) {
        throw new JobException(RESTCodes.JobErrorCode.PROXY_ERROR, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
    } catch (ServiceDiscoveryException ex) {
        throw new ServiceException(RESTCodes.ServiceErrorCode.SERVICE_NOT_FOUND, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
    }
    if (flinkjob == null) {
        throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ERROR, Level.WARNING, "Could not instantiate job with name: " + job.getName() + " and id: " + job.getId(), "sparkjob object was null");
    }
    Execution execution = flinkjob.requestExecutionId();
    submitter.startExecution(flinkjob);
    activityFacade.persistActivity(ActivityFacade.RAN_JOB, job.getProject(), user.asUser(), ActivityFlag.JOB);
    return execution;
}
Also used : PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) IOException(java.io.IOException) GenericException(io.hops.hopsworks.exceptions.GenericException) JobException(io.hops.hopsworks.exceptions.JobException) Execution(io.hops.hopsworks.persistence.entity.jobs.history.Execution) ServiceException(io.hops.hopsworks.exceptions.ServiceException) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 9 with JobException

use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.

the class JobController method putJob.

public Jobs putJob(Users user, Project project, Jobs job, JobConfiguration config) throws JobException {
    try {
        if (config.getJobType() == JobType.SPARK || config.getJobType() == JobType.PYSPARK) {
            SparkConfigurationUtil sparkConfigurationUtil = new SparkConfigurationUtil();
            SparkJobConfiguration sparkJobConfiguration = (SparkJobConfiguration) config;
            sparkConfigurationUtil.validateExecutorMemory(sparkJobConfiguration.getExecutorMemory(), settings);
        }
        job = jobFacade.put(user, project, config, job);
    } catch (IllegalStateException ise) {
        if (ise.getCause() instanceof JAXBException) {
            throw new JobException(RESTCodes.JobErrorCode.JOB_CONFIGURATION_CONVERT_TO_JSON_ERROR, Level.FINE, "Unable to create json from JobConfiguration", ise.getMessage(), ise);
        } else {
            throw ise;
        }
    }
    if (config.getSchedule() != null) {
        scheduler.scheduleJobPeriodic(job);
    }
    activityFacade.persistActivity(ActivityFacade.CREATED_JOB + getJobNameForActivity(job.getName()), project, user, ActivityFlag.JOB);
    return job;
}
Also used : JobException(io.hops.hopsworks.exceptions.JobException) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) JAXBException(javax.xml.bind.JAXBException) SparkConfigurationUtil(io.hops.hopsworks.common.util.SparkConfigurationUtil)

Example 10 with JobException

use of io.hops.hopsworks.exceptions.JobException in project hopsworks by logicalclocks.

the class FeaturegroupService method deleteFeaturegroupContents.

/**
 * Endpoint for deleting the contents of the featuregroup.
 * As HopsHive do not support ACID transactions the way to delete the contents of a table is to drop the table and
 * re-create it, which also will drop the featuregroup metadata due to ON DELETE CASCADE foreign key rule.
 * This method stores the metadata of the featuregroup before deleting it and then re-creates the featuregroup with
 * the same metadata.
 * <p>
 * This endpoint is typically used when the user wants to insert data into a featuregroup with the write-mode
 * 'overwrite' instead of default mode 'append'
 *
 * @param featuregroupId the id of the featuregroup
 * @throws FeaturestoreException
 * @throws HopsSecurityException
 */
@POST
@Path("/{featuregroupId}/clear")
@Produces(MediaType.APPLICATION_JSON)
@AllowedProjectRoles({ AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST })
@JWTRequired(acceptedTokens = { Audience.API, Audience.JOB }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER", "HOPS_SERVICE_USER" })
@ApiKeyRequired(acceptedScopes = { ApiScope.FEATURESTORE }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER", "HOPS_SERVICE_USER" })
@ApiOperation(value = "Delete featuregroup contents")
public Response deleteFeaturegroupContents(@Context SecurityContext sc, @Context HttpServletRequest req, @ApiParam(value = "Id of the featuregroup", required = true) @PathParam("featuregroupId") Integer featuregroupId) throws FeaturestoreException, ServiceException, KafkaException, SchemaException, ProjectException, UserException {
    verifyIdProvided(featuregroupId);
    Users user = jWTHelper.getUserPrincipal(sc);
    // Verify that the user has the data-owner role or is the creator of the featuregroup
    Featuregroup featuregroup = featuregroupController.getFeaturegroupById(featurestore, featuregroupId);
    try {
        FeaturegroupDTO newFeatureGroup = featuregroupController.clearFeaturegroup(featuregroup, project, user);
        return Response.ok().entity(newFeatureGroup).build();
    } catch (SQLException | IOException | ProvenanceException | HopsSecurityException | JobException e) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.COULD_NOT_CLEAR_FEATUREGROUP, Level.SEVERE, "project: " + project.getName() + ", featurestoreId: " + featurestore.getId() + ", featuregroupId: " + featuregroupId, e.getMessage(), e);
    }
}
Also used : JobException(io.hops.hopsworks.exceptions.JobException) ProvenanceException(io.hops.hopsworks.exceptions.ProvenanceException) SQLException(java.sql.SQLException) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) Users(io.hops.hopsworks.persistence.entity.user.Users) IOException(java.io.IOException) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) HopsSecurityException(io.hops.hopsworks.exceptions.HopsSecurityException) Path(javax.ws.rs.Path) DatasetPath(io.hops.hopsworks.common.dataset.util.DatasetPath) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) JWTRequired(io.hops.hopsworks.jwt.annotation.JWTRequired) ApiOperation(io.swagger.annotations.ApiOperation) ApiKeyRequired(io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired) AllowedProjectRoles(io.hops.hopsworks.api.filter.AllowedProjectRoles)

Aggregations

JobException (io.hops.hopsworks.exceptions.JobException)27 IOException (java.io.IOException)14 Produces (javax.ws.rs.Produces)12 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)11 JWTRequired (io.hops.hopsworks.jwt.annotation.JWTRequired)11 Path (javax.ws.rs.Path)11 Users (io.hops.hopsworks.persistence.entity.user.Users)9 ApiOperation (io.swagger.annotations.ApiOperation)8 ApiKeyRequired (io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired)7 GenericException (io.hops.hopsworks.exceptions.GenericException)6 ServiceException (io.hops.hopsworks.exceptions.ServiceException)6 DistributedFileSystemOps (io.hops.hopsworks.common.hdfs.DistributedFileSystemOps)5 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)4 HopsSecurityException (io.hops.hopsworks.exceptions.HopsSecurityException)4 ProjectException (io.hops.hopsworks.exceptions.ProjectException)4 SparkJobConfiguration (io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration)4 Execution (io.hops.hopsworks.persistence.entity.jobs.history.Execution)4 TransactionAttribute (javax.ejb.TransactionAttribute)4 ResourceRequest (io.hops.hopsworks.common.api.ResourceRequest)3 YarnAppUrlsDTO (io.hops.hopsworks.common.dao.jobs.description.YarnAppUrlsDTO)3