use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.
the class SparkShellProxyController method addDataSourceInformation.
private DataSet addDataSourceInformation(@Nonnull DataSet dataSet) {
DataSet fetchedDataSet = fetchDataSet(dataSet.getId());
DataSetTemplate template = DataSetUtil.mergeTemplates(fetchedDataSet);
fetchedDataSet.getDataSource().setTemplate(template);
return new DataSet(fetchedDataSet);
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.
the class AbstractKyloCatalogClient method read.
@Nonnull
@Override
public KyloCatalogReader<T> read(@Nonnull final String id) {
final DataSetTemplate dataSet = (dataSets != null) ? dataSets.get(id) : null;
if (dataSet != null) {
final DefaultKyloCatalogReader<T> reader = new DefaultKyloCatalogReader<>(this, hadoopConfiguration, resourceLoader);
reader.dataSet(dataSet);
return reader;
} else {
throw new KyloCatalogException("Data set does not exist: " + id);
}
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.
the class DataSetUtil method getPaths.
/**
* Gets the paths for the specified data set.
*/
@Nonnull
public static Optional<List<String>> getPaths(@Nonnull final DataSet dataSet) {
final DataSetTemplate connectorTemplate = Optional.of(dataSet).map(DataSet::getDataSource).map(DataSource::getConnector).map(Connector::getTemplate).orElse(null);
final DataSetTemplate dataSetTemplate = Optional.of(dataSet).map(DataSet::getDataSource).map(DataSource::getTemplate).orElse(null);
List<String> paths = new ArrayList<>();
// Add "path" option
if (dataSet.getOptions() != null && dataSet.getOptions().get("path") != null) {
paths.add(dataSet.getOptions().get("path"));
} else if (dataSetTemplate != null && dataSetTemplate.getOptions() != null && dataSetTemplate.getOptions().get("path") != null) {
paths.add(dataSetTemplate.getOptions().get("path"));
} else if (connectorTemplate != null && connectorTemplate.getOptions() != null && connectorTemplate.getOptions().get("path") != null) {
paths.add(connectorTemplate.getOptions().get("path"));
}
// Add paths list
if (dataSet.getPaths() != null) {
paths.addAll(dataSet.getPaths());
} else if (dataSetTemplate != null && dataSetTemplate.getPaths() != null) {
paths.addAll(dataSetTemplate.getPaths());
} else if (connectorTemplate != null && connectorTemplate.getPaths() != null) {
paths.addAll(connectorTemplate.getPaths());
} else if (paths.isEmpty()) {
paths = null;
}
return Optional.ofNullable(paths);
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.
the class DefaultSparkJobService method create.
@Nonnull
@Override
public SparkJobContext create(@Nonnull final SparkJobRequest request) {
// Replace parent id with Spark's id
if (request.getParent() != null && request.getParent().getId() != null) {
final DefaultSparkJobContext parent = jobs.getIfPresent(request.getParent().getId());
if (parent != null) {
request.getParent().setId(parent.getSparkJobId());
} else {
throw new SparkException("job.parentExpired");
}
}
// Generate script
final StringBuilder script = new StringBuilder().append("import com.thinkbiganalytics.kylo.catalog.KyloCatalog\n");
if (request.getResources() != null) {
final SparkJobResources resources = request.getResources();
script.append("KyloCatalog.builder\n");
if (resources.getDataSets() != null) {
resources.getDataSets().forEach(dataSetReference -> {
final DataSet dataSet = findDataSet(dataSetReference);
final DataSetTemplate template = DataSetUtil.mergeTemplates(dataSet);
script.append(".addDataSet(\"").append(StringEscapeUtils.escapeJava(dataSet.getId())).append("\")");
if (template.getFiles() != null) {
template.getFiles().forEach(file -> script.append(".addFile(\"").append(StringEscapeUtils.escapeJava(file)).append("\")"));
}
if (template.getFormat() != null) {
script.append(".format(\"").append(StringEscapeUtils.escapeJava(template.getFormat())).append(')');
}
if (template.getJars() != null && !template.getJars().isEmpty()) {
script.append(".addJars(Seq(").append(template.getJars().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
}
if (template.getOptions() != null) {
template.getOptions().forEach((name, value) -> script.append(".option(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\")"));
}
if (template.getPaths() != null) {
script.append(".paths(Seq(").append(template.getPaths().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
}
script.append('\n');
});
}
if (resources.getHighWaterMarks() != null) {
resources.getHighWaterMarks().forEach((name, value) -> script.append(".setHighWaterMark(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\"))\n"));
}
script.append(".build\n\n");
}
script.append(request.getScript()).append("\n\n").append("import com.thinkbiganalytics.spark.rest.model.job.SparkJobResult").append("val sparkJobResult = new SparkJobResult()\n").append("sparkJobResult.setHighWaterMarks(KyloCatalog.builder.build.getHighWaterMarks)\n").append("sparkJobResult\n");
// Find Spark process
final SparkShellProcess process;
try {
if (request.getMode() == SparkJobRequest.Mode.BATCH) {
process = processManager.getSystemProcess();
} else if (request.getMode() == SparkJobRequest.Mode.INTERACTIVE) {
process = processManager.getProcessForUser(SecurityContextHolder.getContext().getAuthentication().getName());
} else {
throw new SparkException("job.invalid-mode");
}
} catch (final InterruptedException e) {
throw new SparkException("job.cancelled", e);
}
// Create task
final BatchJobSupplier task = new BatchJobSupplier(request, process, restClient);
task.setPollInterval(pollInterval, TimeUnit.MILLISECONDS);
// Create context
final DefaultSparkJobContext context = DefaultSparkJobContext.create(task, cache, executor);
jobs.put(context.getId(), context);
return context;
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.
the class KyloCatalogReaderUtil method toKyloCatalogRequest.
public static KyloCatalogReadRequest toKyloCatalogRequest(PreviewDataSetRequest previewRequest) {
DataSource dataSource = previewRequest.getDataSource();
Connector connector = dataSource.getConnector();
// merge template
DataSetTemplate dataSetTemplate = DataSourceUtil.mergeTemplates(dataSource);
// get data out of the dataset template
List<String> jars = dataSetTemplate.getJars();
List<String> paths = dataSetTemplate.getPaths();
List<String> files = dataSetTemplate.getFiles();
String format = dataSetTemplate.getFormat();
Map<String, String> options = dataSetTemplate.getOptions();
if (options == null) {
options = new HashMap<>();
}
// parse the SchemaParser if it exists and add options and update the format
if (previewRequest.getSchemaParser() != null) {
SchemaParserDescriptor schemaParser = previewRequest.getSchemaParser();
Map<String, String> sparkOptions = schemaParser.getProperties().stream().collect(Collectors.toMap(p -> p.getAdditionalProperties().stream().filter(labelValue -> "spark.option".equalsIgnoreCase(labelValue.getLabel())).map(labelValue -> labelValue.getValue()).findFirst().orElse(""), p -> p.getValue()));
// remove any options that produced an empty key
sparkOptions.remove("");
// supplied options by the schema parse take precedence over the template options
options.putAll(sparkOptions);
format = schemaParser.getSparkFormat();
}
// add in additional preview options
if (previewRequest.getProperties() != null && !previewRequest.getProperties().isEmpty()) {
options.putAll(previewRequest.getProperties());
}
KyloCatalogReadRequest request = new KyloCatalogReadRequest();
request.setFiles(files);
request.setJars(jars);
request.setFormat(format);
request.setOptions(options);
if (previewRequest.getPreviewItem() != null && previewRequest.isAddPreviewItemToPath()) {
request.addPath(previewRequest.getPreviewItem());
}
PageSpec pageSpec = previewRequest.getPageSpec();
if (pageSpec == null) {
pageSpec = new PageSpec();
}
request.setPageSpec(pageSpec);
return request;
}
Aggregations