use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSet in project kylo by Teradata.
the class DefaultSparkJobService method create.
@Nonnull
@Override
public SparkJobContext create(@Nonnull final SparkJobRequest request) {
// Replace parent id with Spark's id
if (request.getParent() != null && request.getParent().getId() != null) {
final DefaultSparkJobContext parent = jobs.getIfPresent(request.getParent().getId());
if (parent != null) {
request.getParent().setId(parent.getSparkJobId());
} else {
throw new SparkException("job.parentExpired");
}
}
// Generate script
final StringBuilder script = new StringBuilder().append("import com.thinkbiganalytics.kylo.catalog.KyloCatalog\n");
if (request.getResources() != null) {
final SparkJobResources resources = request.getResources();
script.append("KyloCatalog.builder\n");
if (resources.getDataSets() != null) {
resources.getDataSets().forEach(dataSetReference -> {
final DataSet dataSet = findDataSet(dataSetReference);
final DataSetTemplate template = DataSetUtil.mergeTemplates(dataSet);
script.append(".addDataSet(\"").append(StringEscapeUtils.escapeJava(dataSet.getId())).append("\")");
if (template.getFiles() != null) {
template.getFiles().forEach(file -> script.append(".addFile(\"").append(StringEscapeUtils.escapeJava(file)).append("\")"));
}
if (template.getFormat() != null) {
script.append(".format(\"").append(StringEscapeUtils.escapeJava(template.getFormat())).append(')');
}
if (template.getJars() != null && !template.getJars().isEmpty()) {
script.append(".addJars(Seq(").append(template.getJars().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
}
if (template.getOptions() != null) {
template.getOptions().forEach((name, value) -> script.append(".option(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\")"));
}
if (template.getPaths() != null) {
script.append(".paths(Seq(").append(template.getPaths().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
}
script.append('\n');
});
}
if (resources.getHighWaterMarks() != null) {
resources.getHighWaterMarks().forEach((name, value) -> script.append(".setHighWaterMark(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\"))\n"));
}
script.append(".build\n\n");
}
script.append(request.getScript()).append("\n\n").append("import com.thinkbiganalytics.spark.rest.model.job.SparkJobResult").append("val sparkJobResult = new SparkJobResult()\n").append("sparkJobResult.setHighWaterMarks(KyloCatalog.builder.build.getHighWaterMarks)\n").append("sparkJobResult\n");
// Find Spark process
final SparkShellProcess process;
try {
if (request.getMode() == SparkJobRequest.Mode.BATCH) {
process = processManager.getSystemProcess();
} else if (request.getMode() == SparkJobRequest.Mode.INTERACTIVE) {
process = processManager.getProcessForUser(SecurityContextHolder.getContext().getAuthentication().getName());
} else {
throw new SparkException("job.invalid-mode");
}
} catch (final InterruptedException e) {
throw new SparkException("job.cancelled", e);
}
// Create task
final BatchJobSupplier task = new BatchJobSupplier(request, process, restClient);
task.setPollInterval(pollInterval, TimeUnit.MILLISECONDS);
// Create context
final DefaultSparkJobContext context = DefaultSparkJobContext.create(task, cache, executor);
jobs.put(context.getId(), context);
return context;
}
Aggregations