use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class SparkShellScriptRunner method runScript.
/**
* Run the script with spark shell until finished
*
* @return the response
*/
public Optional<TransformResponse> runScript() throws Exception {
this.startTime = System.currentTimeMillis();
this.endTime = null;
this.isFinished = false;
TransformRequest request = new TransformRequest();
request.setScript(script);
final SparkShellProcess process = sparkShellUserProcessService.getSparkShellProcess(username);
TransformResponse response = restClient.transform(process, request);
if (response.getStatus() == TransformResponse.Status.PENDING) {
log.debug("runScript progress {}", response.getTable());
return fetchProgress(response.getTable());
} else {
log.debug("finished runScript {}", response.getTable());
finished(response);
return Optional.of(response);
}
}
use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class DefaultSparkJobService method create.
@Nonnull
@Override
public SparkJobContext create(@Nonnull final SparkJobRequest request) {
// Replace parent id with Spark's id
if (request.getParent() != null && request.getParent().getId() != null) {
final DefaultSparkJobContext parent = jobs.getIfPresent(request.getParent().getId());
if (parent != null) {
request.getParent().setId(parent.getSparkJobId());
} else {
throw new SparkException("job.parentExpired");
}
}
// Generate script
final StringBuilder script = new StringBuilder().append("import com.thinkbiganalytics.kylo.catalog.KyloCatalog\n");
if (request.getResources() != null) {
final SparkJobResources resources = request.getResources();
script.append("KyloCatalog.builder\n");
if (resources.getDataSets() != null) {
resources.getDataSets().forEach(dataSetReference -> {
final DataSet dataSet = findDataSet(dataSetReference);
final DataSetTemplate template = DataSetUtil.mergeTemplates(dataSet);
script.append(".addDataSet(\"").append(StringEscapeUtils.escapeJava(dataSet.getId())).append("\")");
if (template.getFiles() != null) {
template.getFiles().forEach(file -> script.append(".addFile(\"").append(StringEscapeUtils.escapeJava(file)).append("\")"));
}
if (template.getFormat() != null) {
script.append(".format(\"").append(StringEscapeUtils.escapeJava(template.getFormat())).append(')');
}
if (template.getJars() != null && !template.getJars().isEmpty()) {
script.append(".addJars(Seq(").append(template.getJars().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
}
if (template.getOptions() != null) {
template.getOptions().forEach((name, value) -> script.append(".option(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\")"));
}
if (template.getPaths() != null) {
script.append(".paths(Seq(").append(template.getPaths().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
}
script.append('\n');
});
}
if (resources.getHighWaterMarks() != null) {
resources.getHighWaterMarks().forEach((name, value) -> script.append(".setHighWaterMark(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\"))\n"));
}
script.append(".build\n\n");
}
script.append(request.getScript()).append("\n\n").append("import com.thinkbiganalytics.spark.rest.model.job.SparkJobResult").append("val sparkJobResult = new SparkJobResult()\n").append("sparkJobResult.setHighWaterMarks(KyloCatalog.builder.build.getHighWaterMarks)\n").append("sparkJobResult\n");
// Find Spark process
final SparkShellProcess process;
try {
if (request.getMode() == SparkJobRequest.Mode.BATCH) {
process = processManager.getSystemProcess();
} else if (request.getMode() == SparkJobRequest.Mode.INTERACTIVE) {
process = processManager.getProcessForUser(SecurityContextHolder.getContext().getAuthentication().getName());
} else {
throw new SparkException("job.invalid-mode");
}
} catch (final InterruptedException e) {
throw new SparkException("job.cancelled", e);
}
// Create task
final BatchJobSupplier task = new BatchJobSupplier(request, process, restClient);
task.setPollInterval(pollInterval, TimeUnit.MILLISECONDS);
// Create context
final DefaultSparkJobContext context = DefaultSparkJobContext.create(task, cache, executor);
jobs.put(context.getId(), context);
return context;
}
use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class TestSparkLivyRestClient method testGetLivySessions.
/**
* This test assumes: 1) you have configured a Livy Server for Kerberos and that it is running at 8998 2) that server has the library 'kylo-spark-shell-client-v1-0.10.0-SNAPSHOT.jar' installed
*/
@Test
// ignore, for now, because this is an integration test that requires livy configured on the test system
@Ignore
public void testGetLivySessions() throws JsonProcessingException, InterruptedException {
System.setProperty("sun.security.krb5.debug", "true");
System.setProperty("sun.security.jgss.debug", "true");
SessionsGet sg = new SessionsGet.Builder().from(1).size(2).build();
String sgJson = new ObjectMapper().writeValueAsString(sg);
logger.debug("{}", sgJson);
assertThat(sgJson).isEqualToIgnoringCase("{\"from\":1,\"size\":2}");
logger.debug("kerberosSparkProperties={}", kerberosSparkProperties);
SparkShellProcess sparkProcess = sparkLivyProcessManager.getProcessForUser("kylo");
// ((SparkLivyProcess) sparkProcess).setPort(8999);
JerseyRestClient client = sparkLivyProcessManager.getClient(sparkProcess);
sparkLivyProcessManager.start("kylo");
Integer stmtId = 0;
Statement statement = livyRestProvider.pollStatement(client, sparkProcess, stmtId);
logger.debug("statement={}", statement);
assertThat(statement.getState()).isEqualTo(StatementState.available);
assertThat(statement.getOutput().getStatus()).isEqualTo(StatementOutputStatus.ok);
}
Aggregations