Search in sources :

Example 6 with DataSetTemplate

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.

the class SparkShellProxyController method addDataSourceInformation.

private DataSet addDataSourceInformation(@Nonnull DataSet dataSet) {
    DataSet fetchedDataSet = fetchDataSet(dataSet.getId());
    DataSetTemplate template = DataSetUtil.mergeTemplates(fetchedDataSet);
    fetchedDataSet.getDataSource().setTemplate(template);
    return new DataSet(fetchedDataSet);
}
Also used : DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) DefaultDataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate)

Example 7 with DataSetTemplate

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.

the class AbstractKyloCatalogClient method read.

@Nonnull
@Override
public KyloCatalogReader<T> read(@Nonnull final String id) {
    final DataSetTemplate dataSet = (dataSets != null) ? dataSets.get(id) : null;
    if (dataSet != null) {
        final DefaultKyloCatalogReader<T> reader = new DefaultKyloCatalogReader<>(this, hadoopConfiguration, resourceLoader);
        reader.dataSet(dataSet);
        return reader;
    } else {
        throw new KyloCatalogException("Data set does not exist: " + id);
    }
}
Also used : KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) Nonnull(javax.annotation.Nonnull)

Example 8 with DataSetTemplate

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.

the class DataSetUtil method getPaths.

/**
 * Gets the paths for the specified data set.
 */
@Nonnull
public static Optional<List<String>> getPaths(@Nonnull final DataSet dataSet) {
    final DataSetTemplate connectorTemplate = Optional.of(dataSet).map(DataSet::getDataSource).map(DataSource::getConnector).map(Connector::getTemplate).orElse(null);
    final DataSetTemplate dataSetTemplate = Optional.of(dataSet).map(DataSet::getDataSource).map(DataSource::getTemplate).orElse(null);
    List<String> paths = new ArrayList<>();
    // Add "path" option
    if (dataSet.getOptions() != null && dataSet.getOptions().get("path") != null) {
        paths.add(dataSet.getOptions().get("path"));
    } else if (dataSetTemplate != null && dataSetTemplate.getOptions() != null && dataSetTemplate.getOptions().get("path") != null) {
        paths.add(dataSetTemplate.getOptions().get("path"));
    } else if (connectorTemplate != null && connectorTemplate.getOptions() != null && connectorTemplate.getOptions().get("path") != null) {
        paths.add(connectorTemplate.getOptions().get("path"));
    }
    // Add paths list
    if (dataSet.getPaths() != null) {
        paths.addAll(dataSet.getPaths());
    } else if (dataSetTemplate != null && dataSetTemplate.getPaths() != null) {
        paths.addAll(dataSetTemplate.getPaths());
    } else if (connectorTemplate != null && connectorTemplate.getPaths() != null) {
        paths.addAll(connectorTemplate.getPaths());
    } else if (paths.isEmpty()) {
        paths = null;
    }
    return Optional.ofNullable(paths);
}
Also used : DefaultDataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) ArrayList(java.util.ArrayList) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) Nonnull(javax.annotation.Nonnull)

Example 9 with DataSetTemplate

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.

the class DefaultSparkJobService method create.

@Nonnull
@Override
public SparkJobContext create(@Nonnull final SparkJobRequest request) {
    // Replace parent id with Spark's id
    if (request.getParent() != null && request.getParent().getId() != null) {
        final DefaultSparkJobContext parent = jobs.getIfPresent(request.getParent().getId());
        if (parent != null) {
            request.getParent().setId(parent.getSparkJobId());
        } else {
            throw new SparkException("job.parentExpired");
        }
    }
    // Generate script
    final StringBuilder script = new StringBuilder().append("import com.thinkbiganalytics.kylo.catalog.KyloCatalog\n");
    if (request.getResources() != null) {
        final SparkJobResources resources = request.getResources();
        script.append("KyloCatalog.builder\n");
        if (resources.getDataSets() != null) {
            resources.getDataSets().forEach(dataSetReference -> {
                final DataSet dataSet = findDataSet(dataSetReference);
                final DataSetTemplate template = DataSetUtil.mergeTemplates(dataSet);
                script.append(".addDataSet(\"").append(StringEscapeUtils.escapeJava(dataSet.getId())).append("\")");
                if (template.getFiles() != null) {
                    template.getFiles().forEach(file -> script.append(".addFile(\"").append(StringEscapeUtils.escapeJava(file)).append("\")"));
                }
                if (template.getFormat() != null) {
                    script.append(".format(\"").append(StringEscapeUtils.escapeJava(template.getFormat())).append(')');
                }
                if (template.getJars() != null && !template.getJars().isEmpty()) {
                    script.append(".addJars(Seq(").append(template.getJars().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
                }
                if (template.getOptions() != null) {
                    template.getOptions().forEach((name, value) -> script.append(".option(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\")"));
                }
                if (template.getPaths() != null) {
                    script.append(".paths(Seq(").append(template.getPaths().stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("\", \"", "\"", "\""))).append("))");
                }
                script.append('\n');
            });
        }
        if (resources.getHighWaterMarks() != null) {
            resources.getHighWaterMarks().forEach((name, value) -> script.append(".setHighWaterMark(\"").append(StringEscapeUtils.escapeJava(name)).append("\", \"").append(StringEscapeUtils.escapeJava(value)).append("\"))\n"));
        }
        script.append(".build\n\n");
    }
    script.append(request.getScript()).append("\n\n").append("import com.thinkbiganalytics.spark.rest.model.job.SparkJobResult").append("val sparkJobResult = new SparkJobResult()\n").append("sparkJobResult.setHighWaterMarks(KyloCatalog.builder.build.getHighWaterMarks)\n").append("sparkJobResult\n");
    // Find Spark process
    final SparkShellProcess process;
    try {
        if (request.getMode() == SparkJobRequest.Mode.BATCH) {
            process = processManager.getSystemProcess();
        } else if (request.getMode() == SparkJobRequest.Mode.INTERACTIVE) {
            process = processManager.getProcessForUser(SecurityContextHolder.getContext().getAuthentication().getName());
        } else {
            throw new SparkException("job.invalid-mode");
        }
    } catch (final InterruptedException e) {
        throw new SparkException("job.cancelled", e);
    }
    // Create task
    final BatchJobSupplier task = new BatchJobSupplier(request, process, restClient);
    task.setPollInterval(pollInterval, TimeUnit.MILLISECONDS);
    // Create context
    final DefaultSparkJobContext context = DefaultSparkJobContext.create(task, cache, executor);
    jobs.put(context.getId(), context);
    return context;
}
Also used : SparkShellProcess(com.thinkbiganalytics.spark.shell.SparkShellProcess) SparkException(com.thinkbiganalytics.kylo.spark.SparkException) DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) SparkJobResources(com.thinkbiganalytics.kylo.spark.rest.model.job.SparkJobResources) BatchJobSupplier(com.thinkbiganalytics.kylo.spark.job.tasks.BatchJobSupplier) Nonnull(javax.annotation.Nonnull)

Example 10 with DataSetTemplate

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate in project kylo by Teradata.

the class KyloCatalogReaderUtil method toKyloCatalogRequest.

public static KyloCatalogReadRequest toKyloCatalogRequest(PreviewDataSetRequest previewRequest) {
    DataSource dataSource = previewRequest.getDataSource();
    Connector connector = dataSource.getConnector();
    // merge template
    DataSetTemplate dataSetTemplate = DataSourceUtil.mergeTemplates(dataSource);
    // get data out of the dataset template
    List<String> jars = dataSetTemplate.getJars();
    List<String> paths = dataSetTemplate.getPaths();
    List<String> files = dataSetTemplate.getFiles();
    String format = dataSetTemplate.getFormat();
    Map<String, String> options = dataSetTemplate.getOptions();
    if (options == null) {
        options = new HashMap<>();
    }
    // parse the SchemaParser if it exists and add options and update the format
    if (previewRequest.getSchemaParser() != null) {
        SchemaParserDescriptor schemaParser = previewRequest.getSchemaParser();
        Map<String, String> sparkOptions = schemaParser.getProperties().stream().collect(Collectors.toMap(p -> p.getAdditionalProperties().stream().filter(labelValue -> "spark.option".equalsIgnoreCase(labelValue.getLabel())).map(labelValue -> labelValue.getValue()).findFirst().orElse(""), p -> p.getValue()));
        // remove any options that produced an empty key
        sparkOptions.remove("");
        // supplied options by the schema parse take precedence over the template options
        options.putAll(sparkOptions);
        format = schemaParser.getSparkFormat();
    }
    // add in additional preview options
    if (previewRequest.getProperties() != null && !previewRequest.getProperties().isEmpty()) {
        options.putAll(previewRequest.getProperties());
    }
    KyloCatalogReadRequest request = new KyloCatalogReadRequest();
    request.setFiles(files);
    request.setJars(jars);
    request.setFormat(format);
    request.setOptions(options);
    if (previewRequest.getPreviewItem() != null && previewRequest.isAddPreviewItemToPath()) {
        request.addPath(previewRequest.getPreviewItem());
    }
    PageSpec pageSpec = previewRequest.getPageSpec();
    if (pageSpec == null) {
        pageSpec = new PageSpec();
    }
    request.setPageSpec(pageSpec);
    return request;
}
Also used : DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) List(java.util.List) Connector(com.thinkbiganalytics.kylo.catalog.rest.model.Connector) PreviewDataSetRequest(com.thinkbiganalytics.spark.rest.model.PreviewDataSetRequest) DataSourceUtil(com.thinkbiganalytics.kylo.catalog.datasource.DataSourceUtil) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) Map(java.util.Map) SchemaParserDescriptor(com.thinkbiganalytics.discovery.model.SchemaParserDescriptor) KyloCatalogReadRequest(com.thinkbiganalytics.spark.rest.model.KyloCatalogReadRequest) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) Connector(com.thinkbiganalytics.kylo.catalog.rest.model.Connector) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) KyloCatalogReadRequest(com.thinkbiganalytics.spark.rest.model.KyloCatalogReadRequest) SchemaParserDescriptor(com.thinkbiganalytics.discovery.model.SchemaParserDescriptor) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)

Aggregations

DataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate)10 Nonnull (javax.annotation.Nonnull)7 DataSet (com.thinkbiganalytics.kylo.catalog.rest.model.DataSet)4 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)4 DefaultDataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate)4 DataSourceUtil (com.thinkbiganalytics.kylo.catalog.datasource.DataSourceUtil)3 List (java.util.List)3 KerberosTicketConfiguration (com.thinkbiganalytics.kerberos.KerberosTicketConfiguration)2 KyloCatalogException (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException)2 Connector (com.thinkbiganalytics.kylo.catalog.rest.model.Connector)2 DBSchemaParser (com.thinkbiganalytics.schema.DBSchemaParser)2 Nullable (javax.annotation.Nullable)2 StringUtils (org.apache.commons.lang3.StringUtils)2 Configuration (org.apache.hadoop.conf.Configuration)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2 SingleConnectionDataSource (org.springframework.jdbc.datasource.SingleConnectionDataSource)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 CacheLoader (com.google.common.cache.CacheLoader)1