Search in sources :

Example 11 with SparkShellProcess

use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.

the class SparkShellScriptRunner method runScript.

/**
 * Run the script with spark shell until finished
 *
 * @return the response
 */
public Optional<TransformResponse> runScript() throws Exception {
    this.startTime = System.currentTimeMillis();
    this.endTime = null;
    this.isFinished = false;
    TransformRequest request = new TransformRequest();
    request.setScript(script);
    final SparkShellProcess process = sparkShellUserProcessService.getSparkShellProcess(username);
    TransformResponse response = restClient.transform(process, request);
    if (response.getStatus() == TransformResponse.Status.PENDING) {
        log.debug("runScript progress {}", response.getTable());
        return fetchProgress(response.getTable());
    } else {
        log.debug("finished runScript {}", response.getTable());
        finished(response);
        return Optional.of(response);
    }
}
Also used : SparkShellProcess(com.thinkbiganalytics.spark.shell.SparkShellProcess) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) TransformRequest(com.thinkbiganalytics.spark.rest.model.TransformRequest)

Example 12 with SparkShellProcess

use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.

the class DefaultSparkJobService method create.

@Nonnull
@Override
public SparkJobContext create(@Nonnull final SparkJobRequest request) {
    // Replace parent id with Spark's id
    if (request.getParent() != null && request.getParent().getId() != null) {
        final DefaultSparkJobContext parent = jobs.getIfPresent(request.getParent().getId());
        if (parent != null) {
            request.getParent().setId(parent.getSparkJobId());
        } else {
            throw new SparkException("job.parentExpired");
        }
    }
    // Generate script
    final StringBuilder script = new StringBuilder().append("import com.thinkbiganalytics.kylo.catalog.KyloCatalog\n");
    if (request.getResources() != null) {
        final SparkJobResources resources = request.getResources();
        script.append("KyloCatalog.builder\n");
        if (resources.getDataSets() != null) {
            resources.getDataSets().forEach(dataSetReference -> {
                final DataSet dataSet = findDataSet(dataSetReference);
                final DataSetTemplate template = DataSetUtil.mergeTemplates(dataSet);
                script.append(".addDataSet(\"").append(StringEscapeUtils.escapeJava(dataSet.getId())).append("\")");
                if (template.getFiles() != null) {
                    template.getFiles().forEach(file -> script.append(".addFile(\"").append(StringEscapeUtils.escapeJava(file)).append("\")"));
                }
                if (template.getFormat() != null) {
                    script.append(".format(\"").append(StringEscapeUtils.escapeJava(template.getFormat())).append(')');
                }
                if (template.getJars() != null && !template.getJars().isEmpty()) {
                    script.append(".addJars(Seq(").append(template.getJars().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
                }
                if (template.getOptions() != null) {
                    template.getOptions().forEach((name, value) -> script.append(".option(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\")"));
                }
                if (template.getPaths() != null) {
                    script.append(".paths(Seq(").append(template.getPaths().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
                }
                script.append('\n');
            });
        }
        if (resources.getHighWaterMarks() != null) {
            resources.getHighWaterMarks().forEach((name, value) -> script.append(".setHighWaterMark(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\"))\n"));
        }
        script.append(".build\n\n");
    }
    script.append(request.getScript()).append("\n\n").append("import com.thinkbiganalytics.spark.rest.model.job.SparkJobResult").append("val sparkJobResult = new SparkJobResult()\n").append("sparkJobResult.setHighWaterMarks(KyloCatalog.builder.build.getHighWaterMarks)\n").append("sparkJobResult\n");
    // Find Spark process
    final SparkShellProcess process;
    try {
        if (request.getMode() == SparkJobRequest.Mode.BATCH) {
            process = processManager.getSystemProcess();
        } else if (request.getMode() == SparkJobRequest.Mode.INTERACTIVE) {
            process = processManager.getProcessForUser(SecurityContextHolder.getContext().getAuthentication().getName());
        } else {
            throw new SparkException("job.invalid-mode");
        }
    } catch (final InterruptedException e) {
        throw new SparkException("job.cancelled", e);
    }
    // Create task
    final BatchJobSupplier task = new BatchJobSupplier(request, process, restClient);
    task.setPollInterval(pollInterval, TimeUnit.MILLISECONDS);
    // Create context
    final DefaultSparkJobContext context = DefaultSparkJobContext.create(task, cache, executor);
    jobs.put(context.getId(), context);
    return context;
}
Also used : SparkShellProcess(com.thinkbiganalytics.spark.shell.SparkShellProcess) SparkException(com.thinkbiganalytics.kylo.spark.SparkException) DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) SparkJobResources(com.thinkbiganalytics.kylo.spark.rest.model.job.SparkJobResources) BatchJobSupplier(com.thinkbiganalytics.kylo.spark.job.tasks.BatchJobSupplier) Nonnull(javax.annotation.Nonnull)

Example 13 with SparkShellProcess

use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.

the class TestSparkLivyRestClient method testGetLivySessions.

/**
 * This test assumes: 1) you have configured a Livy Server for Kerberos and that it is running at 8998 2) that server has the library 'kylo-spark-shell-client-v1-0.10.0-SNAPSHOT.jar' installed
 */
@Test
// ignore, for now, because this is an integration test that requires livy configured on the test system
@Ignore
public void testGetLivySessions() throws JsonProcessingException, InterruptedException {
    System.setProperty("sun.security.krb5.debug", "true");
    System.setProperty("sun.security.jgss.debug", "true");
    SessionsGet sg = new SessionsGet.Builder().from(1).size(2).build();
    String sgJson = new ObjectMapper().writeValueAsString(sg);
    logger.debug("{}", sgJson);
    assertThat(sgJson).isEqualToIgnoringCase("{\"from\":1,\"size\":2}");
    logger.debug("kerberosSparkProperties={}", kerberosSparkProperties);
    SparkShellProcess sparkProcess = sparkLivyProcessManager.getProcessForUser("kylo");
    // ((SparkLivyProcess) sparkProcess).setPort(8999);
    JerseyRestClient client = sparkLivyProcessManager.getClient(sparkProcess);
    sparkLivyProcessManager.start("kylo");
    Integer stmtId = 0;
    Statement statement = livyRestProvider.pollStatement(client, sparkProcess, stmtId);
    logger.debug("statement={}", statement);
    assertThat(statement.getState()).isEqualTo(StatementState.available);
    assertThat(statement.getOutput().getStatus()).isEqualTo(StatementOutputStatus.ok);
}
Also used : SparkShellProcess(com.thinkbiganalytics.spark.shell.SparkShellProcess) Statement(com.thinkbiganalytics.kylo.spark.model.Statement) SessionsGet(com.thinkbiganalytics.kylo.spark.model.SessionsGet) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JerseyRestClient(com.thinkbiganalytics.rest.JerseyRestClient) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

SparkShellProcess (com.thinkbiganalytics.spark.shell.SparkShellProcess)13 ApiOperation (io.swagger.annotations.ApiOperation)7 ApiResponses (io.swagger.annotations.ApiResponses)7 Consumes (javax.ws.rs.Consumes)7 Path (javax.ws.rs.Path)7 Produces (javax.ws.rs.Produces)7 Nonnull (javax.annotation.Nonnull)6 POST (javax.ws.rs.POST)6 TransformResponse (com.thinkbiganalytics.spark.rest.model.TransformResponse)5 SparkException (com.thinkbiganalytics.kylo.spark.SparkException)3 DataSet (com.thinkbiganalytics.kylo.catalog.rest.model.DataSet)2 TransformRequest (com.thinkbiganalytics.spark.rest.model.TransformRequest)2 SparkShellSaveException (com.thinkbiganalytics.spark.shell.SparkShellSaveException)2 SparkShellTransformException (com.thinkbiganalytics.spark.shell.SparkShellTransformException)2 File (java.io.File)2 IOException (java.io.IOException)2 TimeoutException (java.util.concurrent.TimeoutException)2 BadRequestException (javax.ws.rs.BadRequestException)2 NotFoundException (javax.ws.rs.NotFoundException)2 WebApplicationException (javax.ws.rs.WebApplicationException)2