Search in sources :

Example 21 with DataSet

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSet in project kylo by Teradata.

the class DefaultSparkJobService method create.

@Nonnull
@Override
public SparkJobContext create(@Nonnull final SparkJobRequest request) {
    // Replace parent id with Spark's id
    if (request.getParent() != null && request.getParent().getId() != null) {
        final DefaultSparkJobContext parent = jobs.getIfPresent(request.getParent().getId());
        if (parent != null) {
            request.getParent().setId(parent.getSparkJobId());
        } else {
            throw new SparkException("job.parentExpired");
        }
    }
    // Generate script
    final StringBuilder script = new StringBuilder().append("import com.thinkbiganalytics.kylo.catalog.KyloCatalog\n");
    if (request.getResources() != null) {
        final SparkJobResources resources = request.getResources();
        script.append("KyloCatalog.builder\n");
        if (resources.getDataSets() != null) {
            resources.getDataSets().forEach(dataSetReference -> {
                final DataSet dataSet = findDataSet(dataSetReference);
                final DataSetTemplate template = DataSetUtil.mergeTemplates(dataSet);
                script.append(".addDataSet(\"").append(StringEscapeUtils.escapeJava(dataSet.getId())).append("\")");
                if (template.getFiles() != null) {
                    template.getFiles().forEach(file -> script.append(".addFile(\"").append(StringEscapeUtils.escapeJava(file)).append("\")"));
                }
                if (template.getFormat() != null) {
                    script.append(".format(\"").append(StringEscapeUtils.escapeJava(template.getFormat())).append(')');
                }
                if (template.getJars() != null && !template.getJars().isEmpty()) {
                    script.append(".addJars(Seq(").append(template.getJars().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
                }
                if (template.getOptions() != null) {
                    template.getOptions().forEach((name, value) -> script.append(".option(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\")"));
                }
                if (template.getPaths() != null) {
                    script.append(".paths(Seq(").append(template.getPaths().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
                }
                script.append('\n');
            });
        }
        if (resources.getHighWaterMarks() != null) {
            resources.getHighWaterMarks().forEach((name, value) -> script.append(".setHighWaterMark(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\"))\n"));
        }
        script.append(".build\n\n");
    }
    script.append(request.getScript()).append("\n\n").append("import com.thinkbiganalytics.spark.rest.model.job.SparkJobResult").append("val sparkJobResult = new SparkJobResult()\n").append("sparkJobResult.setHighWaterMarks(KyloCatalog.builder.build.getHighWaterMarks)\n").append("sparkJobResult\n");
    // Find Spark process
    final SparkShellProcess process;
    try {
        if (request.getMode() == SparkJobRequest.Mode.BATCH) {
            process = processManager.getSystemProcess();
        } else if (request.getMode() == SparkJobRequest.Mode.INTERACTIVE) {
            process = processManager.getProcessForUser(SecurityContextHolder.getContext().getAuthentication().getName());
        } else {
            throw new SparkException("job.invalid-mode");
        }
    } catch (final InterruptedException e) {
        throw new SparkException("job.cancelled", e);
    }
    // Create task
    final BatchJobSupplier task = new BatchJobSupplier(request, process, restClient);
    task.setPollInterval(pollInterval, TimeUnit.MILLISECONDS);
    // Create context
    final DefaultSparkJobContext context = DefaultSparkJobContext.create(task, cache, executor);
    jobs.put(context.getId(), context);
    return context;
}
Also used : SparkShellProcess(com.thinkbiganalytics.spark.shell.SparkShellProcess) SparkException(com.thinkbiganalytics.kylo.spark.SparkException) DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) SparkJobResources(com.thinkbiganalytics.kylo.spark.rest.model.job.SparkJobResources) BatchJobSupplier(com.thinkbiganalytics.kylo.spark.job.tasks.BatchJobSupplier) Nonnull(javax.annotation.Nonnull)

Aggregations

DataSet (com.thinkbiganalytics.kylo.catalog.rest.model.DataSet)21 ApiOperation (io.swagger.annotations.ApiOperation)9 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)8 ApiResponses (io.swagger.annotations.ApiResponses)8 Path (javax.ws.rs.Path)8 CatalogException (com.thinkbiganalytics.kylo.catalog.CatalogException)7 DataSetFile (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile)6 BadRequestException (javax.ws.rs.BadRequestException)6 DefaultDataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate)5 Nonnull (javax.annotation.Nonnull)5 POST (javax.ws.rs.POST)5 Test (org.junit.Test)5 Produces (javax.ws.rs.Produces)4 File (java.io.File)3 Consumes (javax.ws.rs.Consumes)3 InternalServerErrorException (javax.ws.rs.InternalServerErrorException)3 NotFoundException (javax.ws.rs.NotFoundException)3 WebApplicationException (javax.ws.rs.WebApplicationException)3 FileAlreadyExistsException (org.apache.hadoop.fs.FileAlreadyExistsException)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2