Search in sources :

Example 11 with SparkJobConfiguration

use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.

the class JupyterConfigFilesGenerator method createSparkMagicConfig.

public void createSparkMagicConfig(Writer out, Project project, JupyterSettings js, String hdfsUser, Users hopsworksUser, String confDirPath) throws IOException, ServiceDiscoveryException, JobException, ApiKeyException {
    SparkJobConfiguration sparkJobConfiguration = (SparkJobConfiguration) js.getJobConfig();
    // If user selected Python we should use the default spark configuration for Spark/PySpark kernels
    if (js.isPythonKernel()) {
        sparkJobConfiguration = (SparkJobConfiguration) jobController.getConfiguration(project, JobType.SPARK, true);
    }
    SparkConfigurationUtil sparkConfigurationUtil = new SparkConfigurationUtil();
    Map<String, String> extraJavaOptions = new HashMap<>();
    extraJavaOptions.put(Settings.LOGSTASH_JOB_INFO, project.getName().toLowerCase() + ",jupyter,notebook,?");
    HashMap<String, String> finalSparkConfiguration = new HashMap<>();
    finalSparkConfiguration.put(Settings.SPARK_DRIVER_STAGINGDIR_ENV, "hdfs:///Projects/" + project.getName() + "/Resources/.sparkStaging");
    // Set Hopsworks consul service domain, don't use the address, use the name
    String hopsworksRestEndpoint = "https://" + serviceDiscoveryController.constructServiceFQDNWithPort(ServiceDiscoveryController.HopsworksService.HOPSWORKS_APP);
    finalSparkConfiguration.putAll(sparkConfigurationUtil.setFrameworkProperties(project, sparkJobConfiguration, settings, hdfsUser, hopsworksUser, extraJavaOptions, kafkaBrokers.getKafkaBrokersString(), hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
    StringBuilder sparkConfBuilder = new StringBuilder();
    ArrayList<String> keys = new ArrayList<>(finalSparkConfiguration.keySet());
    Collections.sort(keys);
    for (String configKey : keys) {
        sparkConfBuilder.append("\t\"" + configKey + "\":\"" + finalSparkConfiguration.get(configKey) + "\"," + "\n");
    }
    sparkConfBuilder.deleteCharAt(sparkConfBuilder.lastIndexOf(","));
    try {
        Service livyService = serviceDiscoveryController.getAnyAddressOfServiceWithDNS(ServiceDiscoveryController.HopsworksService.LIVY);
        SparkMagicConfigTemplateBuilder templateBuilder = SparkMagicConfigTemplateBuilder.newBuilder().setLivyIp(livyService.getAddress()).setJupyterHome(confDirPath).setDriverCores(Integer.parseInt(finalSparkConfiguration.get(Settings.SPARK_DRIVER_CORES_ENV))).setDriverMemory(finalSparkConfiguration.get(Settings.SPARK_DRIVER_MEMORY_ENV)).setLivyStartupTimeout(settings.getLivyStartupTimeout());
        if (sparkJobConfiguration.isDynamicAllocationEnabled() || sparkJobConfiguration.getExperimentType() != null) {
            templateBuilder.setNumExecutors(1);
        } else {
            templateBuilder.setNumExecutors(Integer.parseInt(finalSparkConfiguration.get(Settings.SPARK_NUMBER_EXECUTORS_ENV)));
        }
        templateBuilder.setExecutorCores(Integer.parseInt(finalSparkConfiguration.get(Settings.SPARK_EXECUTOR_CORES_ENV))).setExecutorMemory(finalSparkConfiguration.get(Settings.SPARK_EXECUTOR_MEMORY_ENV)).setHdfsUser(hdfsUser).setYarnQueue(sparkJobConfiguration.getAmQueue()).setHadoopHome(settings.getHadoopSymbolicLinkDir()).setHadoopVersion(settings.getHadoopVersion()).setSparkConfiguration(sparkConfBuilder.toString());
        Map<String, Object> dataModel = new HashMap<>(1);
        dataModel.put("conf", templateBuilder.build());
        templateEngine.template(SparkMagicConfigTemplate.TEMPLATE_NAME, dataModel, out);
    } catch (TemplateException | ServiceDiscoveryException ex) {
        throw new IOException(ex);
    }
}
Also used : HashMap(java.util.HashMap) TemplateException(freemarker.template.TemplateException) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) SparkMagicConfigTemplateBuilder(io.hops.hopsworks.common.util.templates.jupyter.SparkMagicConfigTemplateBuilder) ArrayList(java.util.ArrayList) Service(com.logicalclocks.servicediscoverclient.service.Service) IOException(java.io.IOException) SparkConfigurationUtil(io.hops.hopsworks.common.util.SparkConfigurationUtil) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)

Example 12 with SparkJobConfiguration

use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.

the class DefaultJobConfigurationFacade method createOrUpdate.

public DefaultJobConfiguration createOrUpdate(Project project, JobConfiguration jobConfiguration, JobType jobType, DefaultJobConfiguration currentConfig) throws ProjectException {
    if (jobConfiguration instanceof SparkJobConfiguration) {
        ((SparkJobConfiguration) jobConfiguration).setMainClass(Settings.SPARK_PY_MAINCLASS);
        jobType = JobType.PYSPARK;
    }
    // create
    if (currentConfig == null) {
        currentConfig = new DefaultJobConfiguration();
        DefaultJobConfigurationPK pk = new DefaultJobConfigurationPK();
        pk.setProjectId(project.getId());
        pk.setType(jobType);
        currentConfig.setDefaultJobConfigurationPK(pk);
        currentConfig.setJobConfig(jobConfiguration);
        project.getDefaultJobConfigurationCollection().add(currentConfig);
        em.merge(project);
        return currentConfig;
    // update
    } else {
        for (DefaultJobConfiguration dc : project.getDefaultJobConfigurationCollection()) {
            if (dc.getDefaultJobConfigurationPK().getType().equals(jobType)) {
                dc.setJobConfig(jobConfiguration);
                em.merge(project);
                return dc;
            }
        }
        throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_DEFAULT_JOB_CONFIG_NOT_FOUND, Level.FINEST);
    }
}
Also used : ProjectException(io.hops.hopsworks.exceptions.ProjectException) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) DefaultJobConfiguration(io.hops.hopsworks.persistence.entity.project.jobs.DefaultJobConfiguration) DefaultJobConfigurationPK(io.hops.hopsworks.persistence.entity.jobs.configuration.DefaultJobConfigurationPK)

Example 13 with SparkJobConfiguration

use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.

the class SparkController method startJob.

/**
 * Start the Spark job as the given user.
 * <p/>
 * @param job
 * @param user
 * @return
 * @throws IllegalStateException If Spark is not set up properly.
 * @throws IOException If starting the job fails.
 * Spark job.
 */
public Execution startJob(final Jobs job, String args, final Users user) throws ServiceException, GenericException, JobException, ProjectException {
    // First: some parameter checking.
    sanityCheck(job, user);
    String username = hdfsUsersBean.getHdfsUserName(job.getProject(), user);
    SparkJobConfiguration sparkConfig = (SparkJobConfiguration) job.getJobConfig();
    String appPath = sparkConfig.getAppPath();
    if (job.getJobType().equals(JobType.PYSPARK)) {
        if (job.getProject().getPythonEnvironment() == null) {
            // Throw error in Hopsworks UI to notify user to enable Anaconda
            throw new JobException(RESTCodes.JobErrorCode.JOB_START_FAILED, Level.SEVERE, "PySpark job needs to have Python Anaconda environment enabled");
        }
    }
    SparkJob sparkjob = createSparkJob(username, job, user);
    Execution exec = sparkjob.requestExecutionId(args);
    if (job.getJobType().equals(JobType.PYSPARK) && appPath.endsWith(".ipynb")) {
        submitter.getExecutionFacade().updateState(exec, JobState.CONVERTING_NOTEBOOK);
        String pyAppPath = HopsUtils.prepJupyterNotebookConversion(exec, username, dfs);
        sparkConfig.setAppPath(pyAppPath);
        jupyterController.convertIPythonNotebook(username, appPath, job.getProject(), pyAppPath, jupyterController.getNotebookConversionType(appPath, user, job.getProject()));
    }
    submitter.startExecution(sparkjob, args);
    activityFacade.persistActivity(ActivityFacade.RAN_JOB + job.getName(), job.getProject(), user.asUser(), ActivityFlag.JOB);
    return exec;
}
Also used : JobException(io.hops.hopsworks.exceptions.JobException) Execution(io.hops.hopsworks.persistence.entity.jobs.history.Execution) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration)

Example 14 with SparkJobConfiguration

use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.

the class SparkYarnRunnerBuilder method getYarnRunner.

/**
 * Get a YarnRunner instance that will launch a Spark job.
 *
 * @param project name of the project
 * @param jobUser
 * @param services
 * @param dfsClient
 * @param yarnClient
 * @param settings
 * @return The YarnRunner instance to launch the Spark job on Yarn.
 * @throws IOException If creation failed.
 */
public YarnRunner getYarnRunner(Project project, String jobUser, Users hopsworksUser, AsynchronousJobExecutor services, final DistributedFileSystemOps dfsClient, final YarnClient yarnClient, Settings settings, String kafkaBrokersString, String hopsworksRestEndpoint, ServingConfig servingConfig, ServiceDiscoveryController serviceDiscoveryController) throws IOException, ServiceDiscoveryException, JobException, ApiKeyException {
    Map<String, ConfigProperty> jobHopsworksProps = new HashMap<>();
    JobType jobType = job.getJobConfig().getJobType();
    String appPath = ((SparkJobConfiguration) job.getJobConfig()).getAppPath();
    // Create a builder
    YarnRunner.Builder builder = new YarnRunner.Builder(Settings.SPARK_AM_MAIN);
    builder.setJobType(jobType);
    builder.setYarnClient(yarnClient);
    builder.setDfsClient(dfsClient);
    /**
     * * 1. Set stagingPath **
     */
    String stagingPath = "/Projects/" + project.getName() + "/" + Settings.PROJECT_STAGING_DIR + "/.sparkjobstaging-" + YarnRunner.APPID_PLACEHOLDER;
    builder.localResourcesBasePath(stagingPath);
    // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    /**
     * * 2. Set job local resources, i.e. project certificates, job jar etc. **
     */
    // Add hdfs prefix so the monitor knows it should find it there
    builder.addFileToRemove("hdfs://" + stagingPath);
    // Add app file
    String appExecName = null;
    if (jobType == JobType.SPARK) {
        appExecName = Settings.SPARK_LOCRSC_APP_JAR;
    } else if (jobType == JobType.PYSPARK) {
        appExecName = appPath.substring(appPath.lastIndexOf(File.separator) + 1);
    }
    builder.addLocalResource(new LocalResourceDTO(appExecName, appPath, LocalResourceVisibility.APPLICATION.toString(), LocalResourceType.FILE.toString(), null), dfsClient);
    builder.addToAppMasterEnvironment(YarnRunner.KEY_CLASSPATH, Settings.SPARK_LOCRSC_APP_JAR);
    // Set executor extraJavaOptions to make parameters available to executors
    Map<String, String> extraJavaOptions = new HashMap<>();
    // These properties are set so that spark history server picks them up
    jobHopsworksProps.put(Settings.SPARK_DRIVER_STAGINGDIR_ENV, new ConfigProperty(Settings.SPARK_DRIVER_STAGINGDIR_ENV, HopsUtils.IGNORE, stagingPath));
    jobHopsworksProps.put(Settings.HOPSWORKS_APPID_PROPERTY, new ConfigProperty(Settings.HOPSWORKS_APPID_PROPERTY, HopsUtils.IGNORE, YarnRunner.APPID_PLACEHOLDER));
    extraJavaOptions.put(Settings.HOPSWORKS_APPID_PROPERTY, YarnRunner.APPID_PLACEHOLDER);
    extraJavaOptions.put(Settings.LOGSTASH_JOB_INFO, project.getName().toLowerCase() + "," + jobName + "," + job.getId() + "," + YarnRunner.APPID_PLACEHOLDER);
    // Set up command
    StringBuilder amargs = new StringBuilder("--class ");
    amargs.append(((SparkJobConfiguration) job.getJobConfig()).getMainClass());
    if (jobType == JobType.PYSPARK) {
        amargs.append(" --primary-py-file ").append(appExecName);
    }
    Map<String, String> finalJobProps = new HashMap<>();
    finalJobProps.putAll(sparkConfigurationUtil.setFrameworkProperties(project, job.getJobConfig(), settings, jobUser, hopsworksUser, extraJavaOptions, kafkaBrokersString, hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
    finalJobProps.put(Settings.SPARK_YARN_APPMASTER_SPARK_USER, jobUser);
    finalJobProps.put(Settings.SPARK_EXECUTOR_SPARK_USER, jobUser);
    finalJobProps.put(Settings.SPARK_YARN_APPMASTER_YARN_MODE, "true");
    finalJobProps.put(Settings.SPARK_YARN_APPMASTER_YARN_STAGING_DIR, stagingPath);
    // Parse properties from Spark config file
    Properties sparkProperties = new Properties();
    try (InputStream is = new FileInputStream(settings.getSparkDir() + "/" + Settings.SPARK_CONFIG_FILE)) {
        sparkProperties.load(is);
        // For every property that is in the spark configuration file but is not already set, create a system property.
        for (String property : sparkProperties.stringPropertyNames()) {
            if (!finalJobProps.containsKey(property)) {
                finalJobProps.put(property, sparkProperties.getProperty(property).trim());
            }
        }
    }
    for (String jvmOption : finalJobProps.get(Settings.SPARK_DRIVER_EXTRA_JAVA_OPTIONS).split(" +")) {
        builder.addJavaOption(jvmOption);
    }
    for (String key : finalJobProps.keySet()) {
        if (key.startsWith("spark.yarn.appMasterEnv.")) {
            builder.addToAppMasterEnvironment(key.replace("spark.yarn.appMasterEnv.", ""), finalJobProps.get(key));
        }
        addSystemProperty(key, finalJobProps.get(key));
    }
    builder.addToAppMasterEnvironment("CLASSPATH", finalJobProps.get(Settings.SPARK_DRIVER_EXTRACLASSPATH));
    for (String s : sysProps.keySet()) {
        String option = YarnRunner.escapeForShell("-D" + s + "=" + sysProps.get(s));
        builder.addJavaOption(option);
    }
    for (String s : jobArgs) {
        amargs.append(" --arg '").append(s).append("'");
    }
    amargs.append(" --dist-cache-conf 'distcache.conf'");
    builder.amArgs(amargs.toString());
    // Set up Yarn properties
    builder.amMemory(sparkJobConfiguration.getAmMemory());
    builder.amVCores(sparkJobConfiguration.getAmVCores());
    builder.amQueue(sparkJobConfiguration.getAmQueue());
    // pyfiles, jars and files are distributed as spark.yarn.dist.files
    String hopsFiles = finalJobProps.get("spark.yarn.dist.files");
    if (!Strings.isNullOrEmpty(hopsFiles)) {
        for (String filePath : hopsFiles.split(",")) {
            String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
            if (filePath.contains("#")) {
                fileName = filePath.split("#")[1];
                filePath = filePath.substring(0, filePath.indexOf("#"));
            }
            builder.addLocalResource(new LocalResourceDTO(fileName, filePath, LocalResourceVisibility.APPLICATION.toString(), LocalResourceType.FILE.toString(), null), dfsClient);
        }
    }
    String archives = finalJobProps.get("spark.yarn.dist.archives");
    if (!Strings.isNullOrEmpty(archives)) {
        for (String archivePath : archives.split(",")) {
            String fileName = archivePath.substring(archivePath.lastIndexOf("/") + 1);
            if (archivePath.contains("#")) {
                fileName = archivePath.split("#")[1];
                archivePath = archivePath.substring(0, archivePath.indexOf("#"));
            }
            builder.addLocalResource(new LocalResourceDTO(fileName, archivePath, LocalResourceVisibility.APPLICATION.toString(), LocalResourceType.ARCHIVE.toString(), null), dfsClient);
        }
    }
    // Set app name
    builder.appName(jobName);
    return builder.build(settings.getSparkDir(), JobType.SPARK, services);
}
Also used : HashMap(java.util.HashMap) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) Properties(java.util.Properties) LocalResourceDTO(io.hops.hopsworks.persistence.entity.jobs.configuration.yarn.LocalResourceDTO) FileInputStream(java.io.FileInputStream) YarnRunner(io.hops.hopsworks.common.jobs.yarn.YarnRunner) JobType(io.hops.hopsworks.persistence.entity.jobs.configuration.JobType) ConfigProperty(io.hops.hopsworks.common.util.templates.ConfigProperty)

Example 15 with SparkJobConfiguration

use of io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration in project hopsworks by logicalclocks.

the class ProjectController method addTourFilesToProject.

public String addTourFilesToProject(String username, Project project, DistributedFileSystemOps dfso, DistributedFileSystemOps udfso, TourProjectType projectType, ProvTypeDTO projectProvCore) throws DatasetException, HopsSecurityException, ProjectException, JobException, GenericException, ServiceException {
    String tourFilesDataset = Settings.HOPS_TOUR_DATASET;
    Users user = userFacade.findByEmail(username);
    if (null != projectType) {
        String projectPath = Utils.getProjectPath(project.getName());
        switch(projectType) {
            case SPARK:
                datasetController.createDataset(user, project, tourFilesDataset, "files for guide projects", Provenance.getDatasetProvCore(projectProvCore, Provenance.MLType.DATASET), false, DatasetAccessPermission.EDITABLE, dfso);
                String exampleDir = settings.getSparkDir() + Settings.SPARK_EXAMPLES_DIR + "/";
                try {
                    File dir = new File(exampleDir);
                    File[] file = dir.listFiles((File dir1, String name) -> name.matches("spark-examples(.*).jar"));
                    if (file.length == 0) {
                        throw new IllegalStateException("No spark-examples*.jar was found in " + dir.getAbsolutePath());
                    }
                    if (file.length > 1) {
                        LOGGER.log(Level.WARNING, "More than one spark-examples*.jar found in {0}.", dir.getAbsolutePath());
                    }
                    String hdfsJarPath = projectPath + tourFilesDataset + "/spark-examples.jar";
                    udfso.copyToHDFSFromLocal(false, file[0].getAbsolutePath(), hdfsJarPath);
                    String datasetGroup = hdfsUsersController.getHdfsGroupName(project, tourFilesDataset);
                    String userHdfsName = hdfsUsersController.getHdfsUserName(project, user);
                    udfso.setPermission(new Path(hdfsJarPath), udfso.getParentPermission(new Path(hdfsJarPath)));
                    udfso.setOwner(new Path(projectPath + tourFilesDataset + "/spark-examples.jar"), userHdfsName, datasetGroup);
                } catch (IOException ex) {
                    throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_TOUR_FILES_ERROR, Level.SEVERE, "project: " + project.getName(), ex.getMessage(), ex);
                }
                break;
            case KAFKA:
                datasetController.createDataset(user, project, tourFilesDataset, "files for guide projects", Provenance.getDatasetProvCore(projectProvCore, Provenance.MLType.DATASET), false, DatasetAccessPermission.EDITABLE, dfso);
                // Get the JAR from /user/<super user>
                String kafkaExampleSrc = "/user/" + settings.getSparkUser() + "/" + settings.getHopsExamplesSparkFilename();
                String kafkaExampleDst = projectPath + tourFilesDataset + "/" + settings.getHopsExamplesSparkFilename();
                try {
                    udfso.copyInHdfs(new Path(kafkaExampleSrc), new Path(kafkaExampleDst));
                    String datasetGroup = hdfsUsersController.getHdfsGroupName(project, tourFilesDataset);
                    String userHdfsName = hdfsUsersController.getHdfsUserName(project, user);
                    udfso.setPermission(new Path(kafkaExampleDst), udfso.getParentPermission(new Path(kafkaExampleDst)));
                    udfso.setOwner(new Path(kafkaExampleDst), userHdfsName, datasetGroup);
                } catch (IOException ex) {
                    throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_TOUR_FILES_ERROR, Level.SEVERE, "project: " + project.getName(), ex.getMessage(), ex);
                }
                break;
            case ML:
                tourFilesDataset = Settings.HOPS_DL_TOUR_DATASET;
                datasetController.createDataset(user, project, tourFilesDataset, "sample training data for notebooks", Provenance.getDatasetProvCore(projectProvCore, Provenance.MLType.DATASET), false, DatasetAccessPermission.EDITABLE, dfso);
                String DLDataSrc = "/user/" + settings.getHdfsSuperUser() + "/" + Settings.HOPS_DEEP_LEARNING_TOUR_DATA + "/*";
                String DLDataDst = projectPath + Settings.HOPS_DL_TOUR_DATASET;
                String DLNotebooksSrc = "/user/" + settings.getHdfsSuperUser() + "/" + Settings.HOPS_DEEP_LEARNING_TOUR_NOTEBOOKS;
                String DLNotebooksDst = projectPath + Settings.HOPS_TOUR_DATASET_JUPYTER;
                try {
                    udfso.copyInHdfs(new Path(DLDataSrc), new Path(DLDataDst));
                    String datasetGroup = hdfsUsersController.getHdfsGroupName(project, Settings.HOPS_DL_TOUR_DATASET);
                    String userHdfsName = hdfsUsersController.getHdfsUserName(project, user);
                    Inode tourDs = inodeController.getInodeAtPath(DLDataDst);
                    datasetController.recChangeOwnershipAndPermission(new Path(DLDataDst), FsPermission.createImmutable(tourDs.getPermission()), userHdfsName, datasetGroup, dfso, udfso);
                    udfso.copyInHdfs(new Path(DLNotebooksSrc + "/*"), new Path(DLNotebooksDst));
                    datasetGroup = hdfsUsersController.getHdfsGroupName(project, Settings.HOPS_TOUR_DATASET_JUPYTER);
                    Inode jupyterDS = inodeController.getInodeAtPath(DLNotebooksDst);
                    datasetController.recChangeOwnershipAndPermission(new Path(DLNotebooksDst), FsPermission.createImmutable(jupyterDS.getPermission()), userHdfsName, datasetGroup, dfso, udfso);
                } catch (IOException ex) {
                    throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_TOUR_FILES_ERROR, Level.SEVERE, "project: " + project.getName(), ex.getMessage(), ex);
                }
                break;
            case FS:
                datasetController.createDataset(user, project, tourFilesDataset, "files for guide projects", Provenance.getDatasetProvCore(projectProvCore, Provenance.MLType.DATASET), false, DatasetAccessPermission.EDITABLE, dfso);
                // Get the JAR from /user/<super user>
                String featurestoreExampleJarSrc = "/user/" + settings.getSparkUser() + "/" + settings.getHopsExamplesFeaturestoreTourFilename();
                String featurestoreExampleJarDst = projectPath + tourFilesDataset + "/" + settings.getHopsExamplesFeaturestoreTourFilename();
                // Get the sample data and notebooks from /user/<super user>/featurestore_demo/
                String featurestoreExampleDataSrc = "/user/" + settings.getHdfsSuperUser() + "/" + Settings.HOPS_FEATURESTORE_TOUR_DATA + "/data";
                String featurestoreExampleDataDst = projectPath + tourFilesDataset;
                try {
                    // Move example .jar file to HDFS
                    udfso.copyInHdfs(new Path(featurestoreExampleJarSrc), new Path(featurestoreExampleJarDst));
                    String datasetGroup = hdfsUsersController.getHdfsGroupName(project, tourFilesDataset);
                    String userHdfsName = hdfsUsersController.getHdfsUserName(project, user);
                    udfso.setPermission(new Path(featurestoreExampleJarDst), udfso.getParentPermission(new Path(featurestoreExampleJarDst)));
                    udfso.setOwner(new Path(featurestoreExampleJarDst), userHdfsName, datasetGroup);
                    // Move example data and notebooks to HDFS
                    udfso.copyInHdfs(new Path(featurestoreExampleDataSrc), new Path(featurestoreExampleDataDst));
                    datasetGroup = hdfsUsersController.getHdfsGroupName(project, tourFilesDataset);
                    userHdfsName = hdfsUsersController.getHdfsUserName(project, user);
                    Inode featurestoreDataDst = inodeController.getInodeAtPath(featurestoreExampleDataDst);
                    datasetController.recChangeOwnershipAndPermission(new Path(featurestoreExampleDataDst), FsPermission.createImmutable(featurestoreDataDst.getPermission()), userHdfsName, datasetGroup, dfso, udfso);
                    // Move example notebooks to Jupyter dataset
                    String featurestoreExampleNotebooksSrc = "/user/" + settings.getHdfsSuperUser() + "/" + Settings.HOPS_FEATURESTORE_TOUR_DATA + "/notebooks";
                    String featurestoreExampleNotebooksDst = projectPath + Settings.HOPS_TOUR_DATASET_JUPYTER;
                    udfso.copyInHdfs(new Path(featurestoreExampleNotebooksSrc + "/*"), new Path(featurestoreExampleNotebooksDst));
                    datasetGroup = hdfsUsersController.getHdfsGroupName(project, Settings.HOPS_TOUR_DATASET_JUPYTER);
                    Inode featurestoreNotebooksDst = inodeController.getInodeAtPath(featurestoreExampleNotebooksDst);
                    datasetController.recChangeOwnershipAndPermission(new Path(featurestoreExampleNotebooksDst), FsPermission.createImmutable(featurestoreNotebooksDst.getPermission()), userHdfsName, datasetGroup, dfso, udfso);
                } catch (IOException ex) {
                    throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_TOUR_FILES_ERROR, Level.SEVERE, "project: " + project.getName(), ex.getMessage(), ex);
                }
                SparkJobConfiguration sparkJobConfiguration = new SparkJobConfiguration();
                sparkJobConfiguration.setAmQueue("default");
                sparkJobConfiguration.setAmMemory(1024);
                sparkJobConfiguration.setAmVCores(1);
                sparkJobConfiguration.setAppPath("hdfs://" + featurestoreExampleJarDst);
                sparkJobConfiguration.setMainClass(Settings.HOPS_FEATURESTORE_TOUR_JOB_CLASS);
                sparkJobConfiguration.setDefaultArgs("--input TestJob/data");
                sparkJobConfiguration.setExecutorInstances(1);
                sparkJobConfiguration.setExecutorCores(1);
                sparkJobConfiguration.setExecutorMemory(2024);
                sparkJobConfiguration.setExecutorGpus(0);
                sparkJobConfiguration.setDynamicAllocationEnabled(true);
                sparkJobConfiguration.setDynamicAllocationMinExecutors(1);
                sparkJobConfiguration.setDynamicAllocationMaxExecutors(3);
                sparkJobConfiguration.setDynamicAllocationInitialExecutors(1);
                sparkJobConfiguration.setAppName(Settings.HOPS_FEATURESTORE_TOUR_JOB_NAME);
                sparkJobConfiguration.setLocalResources(new LocalResourceDTO[0]);
                Jobs job = jobController.putJob(user, project, null, sparkJobConfiguration);
                activityFacade.persistActivity(ActivityFacade.CREATED_JOB + job.getName(), project, user, ActivityFlag.SERVICE);
                executionController.start(job, Settings.HOPS_FEATURESTORE_TOUR_JOB_INPUT_PARAM + tourFilesDataset + "/data", user);
                activityFacade.persistActivity(ActivityFacade.RAN_JOB + job.getName(), project, user, ActivityFlag.SERVICE);
                break;
            default:
                break;
        }
    }
    return tourFilesDataset;
}
Also used : Path(org.apache.hadoop.fs.Path) ProjectException(io.hops.hopsworks.exceptions.ProjectException) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) Jobs(io.hops.hopsworks.persistence.entity.jobs.description.Jobs) SparkJobConfiguration(io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration) HdfsUsers(io.hops.hopsworks.persistence.entity.hdfs.user.HdfsUsers) Users(io.hops.hopsworks.persistence.entity.user.Users) IOException(java.io.IOException) File(java.io.File)

Aggregations

SparkJobConfiguration (io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration)18 JobException (io.hops.hopsworks.exceptions.JobException)4 IOException (java.io.IOException)4 ProjectException (io.hops.hopsworks.exceptions.ProjectException)3 Jobs (io.hops.hopsworks.persistence.entity.jobs.description.Jobs)3 DefaultJobConfiguration (io.hops.hopsworks.persistence.entity.project.jobs.DefaultJobConfiguration)3 HashMap (java.util.HashMap)3 TransactionAttribute (javax.ejb.TransactionAttribute)3 Path (org.apache.hadoop.fs.Path)3 DatasetPath (io.hops.hopsworks.common.dataset.util.DatasetPath)2 SparkConfigurationUtil (io.hops.hopsworks.common.util.SparkConfigurationUtil)2 ConfigProperty (io.hops.hopsworks.common.util.templates.ConfigProperty)2 Inode (io.hops.hopsworks.persistence.entity.hdfs.inode.Inode)2 FlinkJobConfiguration (io.hops.hopsworks.persistence.entity.jobs.configuration.flink.FlinkJobConfiguration)2 Execution (io.hops.hopsworks.persistence.entity.jobs.history.Execution)2 JAXBException (javax.xml.bind.JAXBException)2 ServiceDiscoveryException (com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)1 Service (com.logicalclocks.servicediscoverclient.service.Service)1 TemplateException (freemarker.template.TemplateException)1 DistributedFileSystemOps (io.hops.hopsworks.common.hdfs.DistributedFileSystemOps)1