Search in sources :

Example 1 with JobType

use of io.hops.hopsworks.persistence.entity.jobs.configuration.JobType in project hopsworks by logicalclocks.

the class JobFacade method setFilterQuery.

private void setFilterQuery(AbstractFacade.FilterBy filterBy, Query q) {
    switch(Filters.valueOf(filterBy.getValue())) {
        case JOBTYPE:
        case JOBTYPE_NEQ:
            Set<JobType> jobTypes = new HashSet<>(getJobTypes(filterBy.getField(), filterBy.getParam()));
            q.setParameter(filterBy.getField(), jobTypes);
            break;
        case DATE_CREATED:
        case DATE_CREATED_GT:
        case DATE_CREATED_LT:
            Date date = getDate(filterBy.getField(), filterBy.getParam());
            q.setParameter(filterBy.getField(), date);
            break;
        case NAME:
            q.setParameter(filterBy.getField(), filterBy.getParam());
            break;
        case CREATOR:
        case LATEST_EXECUTION:
            q.setParameter(filterBy.getField(), filterBy.getParam());
            q.setParameter("searchUpper", filterBy.getParam().toUpperCase());
            break;
        case LATEST_EXECUTION_STATE:
            Set<JobState> jobStates = new HashSet<>(getJobStates(filterBy.getField(), filterBy.getParam()));
            q.setParameter(filterBy.getField(), jobStates);
            break;
        default:
            break;
    }
}
Also used : JobType(io.hops.hopsworks.persistence.entity.jobs.configuration.JobType) JobState(io.hops.hopsworks.persistence.entity.jobs.configuration.history.JobState) Date(java.util.Date) HashSet(java.util.HashSet)

Example 2 with JobType

use of io.hops.hopsworks.persistence.entity.jobs.configuration.JobType in project hopsworks by logicalclocks.

the class JobFacade method getJobTypes.

public static Set<JobType> getJobTypes(String field, String values) {
    String[] jobTypesArr = values.split(",");
    Set<JobType> jobTypes = new HashSet<>();
    for (String jobType : jobTypesArr) {
        try {
            jobTypes.add(JobType.valueOf(jobType.trim().toUpperCase()));
        } catch (IllegalArgumentException ie) {
            throw new InvalidQueryException("Filter value for " + field + " needs to set a valid " + field + ", but found: " + jobType);
        }
    }
    if (jobTypes.isEmpty()) {
        throw new InvalidQueryException("Filter value for " + field + " needs to set valid job types, but found: " + values);
    }
    return jobTypes;
}
Also used : JobType(io.hops.hopsworks.persistence.entity.jobs.configuration.JobType) InvalidQueryException(io.hops.hopsworks.exceptions.InvalidQueryException) HashSet(java.util.HashSet)

Example 3 with JobType

use of io.hops.hopsworks.persistence.entity.jobs.configuration.JobType in project hopsworks by logicalclocks.

the class SparkYarnRunnerBuilder method getYarnRunner.

/**
 * Get a YarnRunner instance that will launch a Spark job.
 *
 * @param project name of the project
 * @param jobUser
 * @param services
 * @param dfsClient
 * @param yarnClient
 * @param settings
 * @return The YarnRunner instance to launch the Spark job on Yarn.
 * @throws IOException If creation failed.
 */
public YarnRunner getYarnRunner(Project project, String jobUser, Users hopsworksUser, AsynchronousJobExecutor services, final DistributedFileSystemOps dfsClient, final YarnClient yarnClient, Settings settings, String kafkaBrokersString, String hopsworksRestEndpoint, ServingConfig servingConfig, ServiceDiscoveryController serviceDiscoveryController) throws IOException, ServiceDiscoveryException, JobException, ApiKeyException {
    Map<String, ConfigProperty> jobHopsworksProps = new HashMap<>();
    JobType jobType = job.getJobConfig().getJobType();
    String appPath = ((SparkJobConfiguration) job.getJobConfig()).getAppPath();
    // Create a builder
    YarnRunner.Builder builder = new YarnRunner.Builder(Settings.SPARK_AM_MAIN);
    builder.setJobType(jobType);
    builder.setYarnClient(yarnClient);
    builder.setDfsClient(dfsClient);
    /**
     * * 1. Set stagingPath **
     */
    String stagingPath = "/Projects/" + project.getName() + "/" + Settings.PROJECT_STAGING_DIR + "/.sparkjobstaging-" + YarnRunner.APPID_PLACEHOLDER;
    builder.localResourcesBasePath(stagingPath);
    // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    /**
     * * 2. Set job local resources, i.e. project certificates, job jar etc. **
     */
    // Add hdfs prefix so the monitor knows it should find it there
    builder.addFileToRemove("hdfs://" + stagingPath);
    // Add app file
    String appExecName = null;
    if (jobType == JobType.SPARK) {
        appExecName = Settings.SPARK_LOCRSC_APP_JAR;
    } else if (jobType == JobType.PYSPARK) {
        appExecName = appPath.substring(appPath.lastIndexOf(File.separator) + 1);
    }
    builder.addLocalResource(new LocalResourceDTO(appExecName, appPath, LocalResourceVisibility.APPLICATION.toString(), LocalResourceType.FILE.toString(), null), dfsClient);
    builder.addToAppMasterEnvironment(YarnRunner.KEY_CLASSPATH, Settings.SPARK_LOCRSC_APP_JAR);
    // Set executor extraJavaOptions to make parameters available to executors
    Map<String, String> extraJavaOptions = new HashMap<>();
    // These properties are set so that spark history server picks them up
    jobHopsworksProps.put(Settings.SPARK_DRIVER_STAGINGDIR_ENV, new ConfigProperty(Settings.SPARK_DRIVER_STAGINGDIR_ENV, HopsUtils.IGNORE, stagingPath));
    jobHopsworksProps.put(Settings.HOPSWORKS_APPID_PROPERTY, new ConfigProperty(Settings.HOPSWORKS_APPID_PROPERTY, HopsUtils.IGNORE, YarnRunner.APPID_PLACEHOLDER));
    extraJavaOptions.put(Settings.HOPSWORKS_APPID_PROPERTY, YarnRunner.APPID_PLACEHOLDER);
    extraJavaOptions.put(Settings.LOGSTASH_JOB_INFO, project.getName().toLowerCase() + "," + jobName + "," + job.getId() + "," + YarnRunner.APPID_PLACEHOLDER);
    // Set up command
    StringBuilder amargs = new StringBuilder("--class ");
    amargs.append(((SparkJobConfiguration) job.getJobConfig()).getMainClass());
    if (jobType == JobType.PYSPARK) {
        amargs.append(" --primary-py-file ").append(appExecName);
    }
    Map<String, String> finalJobProps = new HashMap<>();
    finalJobProps.putAll(sparkConfigurationUtil.setFrameworkProperties(project, job.getJobConfig(), settings, jobUser, hopsworksUser, extraJavaOptions, kafkaBrokersString, hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
    finalJobProps.put(Settings.SPARK_YARN_APPMASTER_SPARK_USER, jobUser);
    finalJobProps.put(Settings.SPARK_EXECUTOR_SPARK_USER, jobUser);
    finalJobProps.put(Settings.SPARK_YARN_APPMASTER_YARN_MODE, "true");
    finalJobProps.put(Settings.SPARK_YARN_APPMASTER_YARN_STAGING_DIR, stagingPath);
    // Parse properties from Spark config file
    Properties sparkProperties = new Properties();
    try (InputStream is = new FileInputStream(settings.getSparkDir() + "/" + Settings.SPARK_CONFIG_FILE)) {
        sparkProperties.load(is);
        // For every property that is in the spark configuration file but is not already set, create a system property.
        for (String property : sparkProperties.stringPropertyNames()) {
            if (!finalJobProps.containsKey(property)) {
                finalJobProps.put(property, sparkProperties.getProperty(property).trim());
            }
        }
    }
    for (String jvmOption : finalJobProps.get(Settings.SPARK_DRIVER_EXTRA_JAVA_OPTIONS).split(" +")) {
        builder.addJavaOption(jvmOption);
    }
    for (String key : finalJobProps.keySet()) {
        if (key.startsWith("spark.yarn.appMasterEnv.")) {
            builder.addToAppMasterEnvironment(key.replace("spark.yarn.appMasterEnv.", ""), finalJobProps.get(key));
        }
        addSystemProperty(key, finalJobProps.get(key));
    }
    builder.addToAppMasterEnvironment("CLASSPATH", finalJobProps.get(Settings.SPARK_DRIVER_EXTRACLASSPATH));
    for (String s : sysProps.keySet()) {
        String option = YarnRunner.escapeForShell("-D" + s + "=" + sysProps.get(s));
        builder.addJavaOption(option);
    }
    for (String s : jobArgs) {
        amargs.append(" --arg '").append(s).append("'");
    }
    amargs.append(" --dist-cache-conf 'distcache.conf'");
    builder.amArgs(amargs.toString());
    // Set up Yarn properties
    builder.amMemory(sparkJobConfiguration.getAmMemory());
    builder.amVCores(sparkJobConfiguration.getAmVCores());
    builder.amQueue(sparkJobConfiguration.getAmQueue());
    // pyfiles, jars and files are distributed as spark.yarn.dist.files
    String hopsFiles = finalJobProps.get("spark.yarn.dist.files");
    if (!Strings.isNullOrEmpty(hopsFiles)) {
        for (String filePath : hopsFiles.split(",")) {
            String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
            if (filePath.contains("#")) {
                fileName = filePath.split("#")[1];
                filePath = filePath.substring(0, filePath.indexOf("#"));
            }
            builder.addLocalResource(new LocalResourceDTO(fileName, filePath, LocalResourceVisibility.APPLICATION.toString(), LocalResourceType.FILE.toString(), null), dfsClient);
        }
    }
    String archives = finalJobProps.get("spark.yarn.dist.archives");
    if (!Strings.isNullOrEmpty(archives)) {
        for (String archivePath : archives.split(",")) {
            String fileName = archivePath.substring(archivePath.lastIndexOf("/") + 1);
            if (archivePath.contains("#")) {
                fileName = archivePath.split("#")[1];
                archivePath = archivePath.substring(0, archivePath.indexOf("#"));
            }
            builder.addLocalResource(new LocalResourceDTO(fileName, archivePath, LocalResourceVisibility.APPLICATION.toString(), LocalResourceType.ARCHIVE.toString(), null), dfsClient);
        }
    }
    // Set app name
    builder.appName(jobName);
    return builder.build(settings.getSparkDir(), JobType.SPARK, services);
}
Also used : HashMap(java.util.HashMap) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) Properties(java.util.Properties) LocalResourceDTO(io.hops.hopsworks.persistence.entity.jobs.configuration.yarn.LocalResourceDTO) FileInputStream(java.io.FileInputStream) YarnRunner(io.hops.hopsworks.common.jobs.yarn.YarnRunner) JobType(io.hops.hopsworks.persistence.entity.jobs.configuration.JobType) ConfigProperty(io.hops.hopsworks.common.util.templates.ConfigProperty)

Example 4 with JobType

use of io.hops.hopsworks.persistence.entity.jobs.configuration.JobType in project hopsworks by logicalclocks.

the class ProjectController method createProjectLogResources.

/**
 * Project default datasets Logs and Resources need to be created in a
 * separate transaction after the project creation
 * is complete.
 *
 * @param user
 * @param project
 * @param dfso
 * @throws java.io.IOException
 */
public void createProjectLogResources(Users user, Project project, DistributedFileSystemOps dfso) throws IOException, DatasetException, HopsSecurityException {
    for (Settings.BaseDataset ds : Settings.BaseDataset.values()) {
        datasetController.createDataset(user, project, ds.getName(), ds.getDescription(), Provenance.Type.DISABLED.dto, false, DatasetAccessPermission.EDITABLE, dfso);
        Path dsPath = new Path(Utils.getProjectPath(project.getName()) + ds.getName());
        FileStatus fstatus = dfso.getFileStatus(dsPath);
        // create subdirectories for the resource dataset
        if (ds.equals(Settings.BaseDataset.RESOURCES)) {
            String[] subResources = settings.getResourceDirs().split(";");
            for (String sub : subResources) {
                Path subDirPath = new Path(dsPath, sub);
                datasetController.createSubDirectory(project, subDirPath, dfso);
                dfso.setOwner(subDirPath, fstatus.getOwner(), fstatus.getGroup());
            }
        } else if (ds.equals(Settings.BaseDataset.LOGS)) {
            dfso.setStoragePolicy(dsPath, settings.getHdfsLogStoragePolicy());
            JobType[] jobTypes = new JobType[] { JobType.SPARK, JobType.PYSPARK, JobType.FLINK };
            for (JobType jobType : jobTypes) {
                Path subDirPath = new Path(dsPath, jobType.getName());
                datasetController.createSubDirectory(project, subDirPath, dfso);
                dfso.setOwner(subDirPath, fstatus.getOwner(), fstatus.getGroup());
            }
        }
        // Persist README.md to hdfs for Default Datasets
        datasetController.generateReadme(dfso, ds.getName(), ds.getDescription(), project.getName());
        Path readmePath = new Path(dsPath, Settings.README_FILE);
        dfso.setOwner(readmePath, fstatus.getOwner(), fstatus.getGroup());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) JobType(io.hops.hopsworks.persistence.entity.jobs.configuration.JobType) Settings(io.hops.hopsworks.common.util.Settings)

Aggregations

JobType (io.hops.hopsworks.persistence.entity.jobs.configuration.JobType)4 HashSet (java.util.HashSet)2 YarnRunner (io.hops.hopsworks.common.jobs.yarn.YarnRunner)1 Settings (io.hops.hopsworks.common.util.Settings)1 ConfigProperty (io.hops.hopsworks.common.util.templates.ConfigProperty)1 InvalidQueryException (io.hops.hopsworks.exceptions.InvalidQueryException)1 JobState (io.hops.hopsworks.persistence.entity.jobs.configuration.history.JobState)1 SparkJobConfiguration (io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration)1 LocalResourceDTO (io.hops.hopsworks.persistence.entity.jobs.configuration.yarn.LocalResourceDTO)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Properties (java.util.Properties)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 Path (org.apache.hadoop.fs.Path)1