Search in sources :

Example 1 with PageSpec

use of com.thinkbiganalytics.spark.rest.model.PageSpec in project kylo by Teradata.

the class SparkShellProxyController method transform.

/**
 * Executes a Spark script that performs transformations using a {@code DataFrame}.
 *
 * @param request the transformation request
 * @return the transformation status
 */
@POST
@Path(TRANSFORM)
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Queries a Hive table and applies a series of transformations on the rows.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the transformation.", response = TransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
@Nonnull
@SuppressWarnings("squid:S1845")
public Response transform(@ApiParam(value = "The request indicates the transformations to apply to the source table and how the user wishes the results to be displayed. Exactly one parent or" + " source must be specified.", required = true) @Nullable final TransformRequest request) {
    // Validate request
    if (request == null || request.getScript() == null) {
        throw transformError(Response.Status.BAD_REQUEST, "transform.missingScript", null);
    }
    if (request.getParent() != null) {
        if (request.getParent().getScript() == null) {
            throw transformError(Response.Status.BAD_REQUEST, "transform.missingParentScript", null);
        }
        if (request.getParent().getTable() == null) {
            throw transformError(Response.Status.BAD_REQUEST, "transform.missingParentTable", null);
        }
    }
    PageSpec ps = request.getPageSpec();
    if (ps != null) {
        if (Stream.of(ps.getFirstCol(), ps.getNumCols(), ps.getFirstRow(), ps.getNumRows()).allMatch(Objects::isNull)) {
            throw transformError(Response.Status.BAD_REQUEST, "transform.badPageSpec", null);
        }
    }
    // Add data source details
    addDatasourceDetails(request);
    // Add Catalog details
    addCatalogDataSets(request);
    addCatalogDataSources(request);
    // Execute request
    final SparkShellProcess process = getSparkShellProcess();
    return getTransformResponse(() -> restClient.transform(process, request));
}
Also used : SparkShellProcess(com.thinkbiganalytics.spark.shell.SparkShellProcess) Objects(java.util.Objects) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) Nonnull(javax.annotation.Nonnull) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 2 with PageSpec

use of com.thinkbiganalytics.spark.rest.model.PageSpec in project kylo by Teradata.

the class TransformService method submitTransformJob.

/**
 * Submits the specified task to be executed and returns the result.
 */
@Nonnull
private TransformResponse submitTransformJob(final Supplier<TransformResult> task, @Nonnull final TransformRequest request) throws ScriptException {
    final FieldPolicy[] policies = getPolicies(request);
    final PageSpec pageSpec = request.getPageSpec();
    log.entry(task, policies);
    // Prepare script
    Supplier<TransformResult> result = task;
    if (request.isDoValidate() && policies != null && policies.length > 0 && validator != null) {
        result = Suppliers.compose(new ValidationStage(policies, validator), result);
    }
    if (request.isDoProfile() && profiler != null) {
        result = Suppliers.compose(new ProfileStage(profiler), result);
    }
    return submitTransformJob(result, pageSpec);
}
Also used : ProfileStage(com.thinkbiganalytics.spark.metadata.ProfileStage) TransformResult(com.thinkbiganalytics.spark.model.TransformResult) FieldPolicy(com.thinkbiganalytics.policy.rest.model.FieldPolicy) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) ValidationStage(com.thinkbiganalytics.spark.metadata.ValidationStage) Nonnull(javax.annotation.Nonnull)

Example 3 with PageSpec

use of com.thinkbiganalytics.spark.rest.model.PageSpec in project kylo by Teradata.

the class KyloCatalogReaderUtil method toKyloCatalogRequest.

public static KyloCatalogReadRequest toKyloCatalogRequest(PreviewDataSetRequest previewRequest) {
    DataSource dataSource = previewRequest.getDataSource();
    Connector connector = dataSource.getConnector();
    // merge template
    DataSetTemplate dataSetTemplate = DataSourceUtil.mergeTemplates(dataSource);
    // get data out of the dataset template
    List<String> jars = dataSetTemplate.getJars();
    List<String> paths = dataSetTemplate.getPaths();
    List<String> files = dataSetTemplate.getFiles();
    String format = dataSetTemplate.getFormat();
    Map<String, String> options = dataSetTemplate.getOptions();
    if (options == null) {
        options = new HashMap<>();
    }
    // parse the SchemaParser if it exists and add options and update the format
    if (previewRequest.getSchemaParser() != null) {
        SchemaParserDescriptor schemaParser = previewRequest.getSchemaParser();
        Map<String, String> sparkOptions = schemaParser.getProperties().stream().collect(Collectors.toMap(p -> p.getAdditionalProperties().stream().filter(labelValue -> "spark.option".equalsIgnoreCase(labelValue.getLabel())).map(labelValue -> labelValue.getValue()).findFirst().orElse(""), p -> p.getValue()));
        // remove any options that produced an empty key
        sparkOptions.remove("");
        // supplied options by the schema parse take precedence over the template options
        options.putAll(sparkOptions);
        format = schemaParser.getSparkFormat();
    }
    // add in additional preview options
    if (previewRequest.getProperties() != null && !previewRequest.getProperties().isEmpty()) {
        options.putAll(previewRequest.getProperties());
    }
    KyloCatalogReadRequest request = new KyloCatalogReadRequest();
    request.setFiles(files);
    request.setJars(jars);
    request.setFormat(format);
    request.setOptions(options);
    if (previewRequest.getPreviewItem() != null && previewRequest.isAddPreviewItemToPath()) {
        request.addPath(previewRequest.getPreviewItem());
    }
    PageSpec pageSpec = previewRequest.getPageSpec();
    if (pageSpec == null) {
        pageSpec = new PageSpec();
    }
    request.setPageSpec(pageSpec);
    return request;
}
Also used : DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) List(java.util.List) Connector(com.thinkbiganalytics.kylo.catalog.rest.model.Connector) PreviewDataSetRequest(com.thinkbiganalytics.spark.rest.model.PreviewDataSetRequest) DataSourceUtil(com.thinkbiganalytics.kylo.catalog.datasource.DataSourceUtil) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) Map(java.util.Map) SchemaParserDescriptor(com.thinkbiganalytics.discovery.model.SchemaParserDescriptor) KyloCatalogReadRequest(com.thinkbiganalytics.spark.rest.model.KyloCatalogReadRequest) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) Connector(com.thinkbiganalytics.kylo.catalog.rest.model.Connector) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) KyloCatalogReadRequest(com.thinkbiganalytics.spark.rest.model.KyloCatalogReadRequest) SchemaParserDescriptor(com.thinkbiganalytics.discovery.model.SchemaParserDescriptor) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)

Aggregations

PageSpec (com.thinkbiganalytics.spark.rest.model.PageSpec)3 Nonnull (javax.annotation.Nonnull)2 SchemaParserDescriptor (com.thinkbiganalytics.discovery.model.SchemaParserDescriptor)1 DataSourceUtil (com.thinkbiganalytics.kylo.catalog.datasource.DataSourceUtil)1 Connector (com.thinkbiganalytics.kylo.catalog.rest.model.Connector)1 DataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate)1 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)1 FieldPolicy (com.thinkbiganalytics.policy.rest.model.FieldPolicy)1 ProfileStage (com.thinkbiganalytics.spark.metadata.ProfileStage)1 ValidationStage (com.thinkbiganalytics.spark.metadata.ValidationStage)1 TransformResult (com.thinkbiganalytics.spark.model.TransformResult)1 KyloCatalogReadRequest (com.thinkbiganalytics.spark.rest.model.KyloCatalogReadRequest)1 PreviewDataSetRequest (com.thinkbiganalytics.spark.rest.model.PreviewDataSetRequest)1 SparkShellProcess (com.thinkbiganalytics.spark.shell.SparkShellProcess)1 ApiOperation (io.swagger.annotations.ApiOperation)1 ApiResponses (io.swagger.annotations.ApiResponses)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1