Search in sources :

Example 1 with SparkShellProcess

use of in project kylo by Teradata.

the class SparkFileSchemaParserService method doParse.

 * Delegate to spark shell service to load the file into a temporary table and loading it
public Schema doParse(InputStream inputStream, SparkFileType fileType, TableSchemaType tableSchemaType) throws IOException {
    File tempFile = toFile(inputStream);
    try {
        SparkShellProcess shellProcess = shellProcessManager.getSystemProcess();
        TransformResponse response = restClient.transform(shellProcess, createTransformRequest(tempFile, fileType));
        while (response.getStatus() != TransformResponse.Status.SUCCESS) {
            if (response.getStatus() == TransformResponse.Status.ERROR) {
                throw new IOException("Failed to process data [" + response.getMessage() + "]");
            } else {
                Uninterruptibles.sleepUninterruptibly(100L, TimeUnit.MILLISECONDS);
            final Optional<TransformResponse> optionalResponse = restClient.getTransformResult(shellProcess, response.getTable());
            if (optionalResponse.isPresent()) {
                response = optionalResponse.get();
        return toSchema(response.getResults(), fileType, tableSchemaType);
    } catch (Exception e) {
        log.error("Error parsing file {}: {}", fileType, e.getMessage());
        throw new IOException("Unexpected exception. Verify file is the proper format", e);
    } finally {
        if (!tempFile.delete()) {
            log.error("The temp file was not deleted successfully: " + tempFile.getName());
Also used : SparkShellProcess( TransformResponse( IOException( File( IOException(

Example 2 with SparkShellProcess

use of in project kylo by Teradata.

the class SparkShellProxyController method query.

 * Executes a SQL query.
 * @param request the query request
 * @return the query status
@ApiOperation("Queries a data source table.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the query.", response = TransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
public Response query(@ApiParam(value = "The request indicates the query to execute. Exactly one source must be specified.", required = true) @Nullable final TransformRequest request) {
    // Validate request
    if (request == null || request.getScript() == null) {
        throw transformError(Response.Status.BAD_REQUEST, "query.missingScript", null);
    // Add data source details
    // Add Catalog details
    // Execute request
    final SparkShellProcess process = getSparkShellProcess();
    return getTransformResponse(() -> restClient.query(process, request));
Also used : SparkShellProcess( Path( POST( Consumes( Produces( Nonnull(javax.annotation.Nonnull) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 3 with SparkShellProcess

use of in project kylo by Teradata.

the class SparkShellProxyController method saveTransform.

 * Saves the results of a Spark script.
@ApiOperation("Saves the results of a transformation.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the save.", response = SaveResponse.class), @ApiResponse(code = 404, message = "The transformation does not exist.", response = SaveResponse.class) })
public Response saveTransform(@Nonnull @PathParam("transform") final String transformId, @ApiParam(value = "The request indicates the destination for saving the transformation. The format is required.", required = true) @Nullable final SaveRequest request) {
    // Validate request
    if (request == null || (request.getJdbc() == null && request.getCatalogDatasource() == null && request.getFormat() == null)) {
        throw transformError(Response.Status.BAD_REQUEST, SparkShellProxyResources.SAVE_MISSING_FORMAT, null);
    // Add data source details
    // Execute request
    final SparkShellProcess process = getSparkShellProcess();
    return getSaveResponse(() -> Optional.of(restClient.saveTransform(process, transformId, request)));
Also used : SparkShellProcess( Path( POST( Consumes( Produces( Nonnull(javax.annotation.Nonnull) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 4 with SparkShellProcess

use of in project kylo by Teradata.

the class SparkShellProxyController method transform.

 * Executes a Spark script that performs transformations using a {@code DataFrame}.
 * @param request the transformation request
 * @return the transformation status
@ApiOperation("Queries a Hive table and applies a series of transformations on the rows.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the transformation.", response = TransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
public Response transform(@ApiParam(value = "The request indicates the transformations to apply to the source table and how the user wishes the results to be displayed. Exactly one parent or" + " source must be specified.", required = true) @Nullable final TransformRequest request) {
    // Validate request
    if (request == null || request.getScript() == null) {
        throw transformError(Response.Status.BAD_REQUEST, "transform.missingScript", null);
    if (request.getParent() != null) {
        if (request.getParent().getScript() == null) {
            throw transformError(Response.Status.BAD_REQUEST, "transform.missingParentScript", null);
        if (request.getParent().getTable() == null) {
            throw transformError(Response.Status.BAD_REQUEST, "transform.missingParentTable", null);
    PageSpec ps = request.getPageSpec();
    if (ps != null) {
        if (Stream.of(ps.getFirstCol(), ps.getNumCols(), ps.getFirstRow(), ps.getNumRows()).allMatch(Objects::isNull)) {
            throw transformError(Response.Status.BAD_REQUEST, "transform.badPageSpec", null);
    // Add data source details
    // Add Catalog details
    // Execute request
    final SparkShellProcess process = getSparkShellProcess();
    return getTransformResponse(() -> restClient.transform(process, request));
Also used : SparkShellProcess( Objects(java.util.Objects) PageSpec( Path( POST( Consumes( Produces( Nonnull(javax.annotation.Nonnull) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 5 with SparkShellProcess

use of in project kylo by Teradata.

the class SparkShellProxyController method saveQuery.

 * Saves the results of a Spark script.
@ApiOperation("Saves the results of a transformation.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the save.", response = SaveResponse.class), @ApiResponse(code = 404, message = "The transformation does not exist.", response = SaveResponse.class) })
public Response saveQuery(@Nonnull @PathParam("query") final String queryId, @ApiParam(value = "The request indicates the destination for saving the transformation. The format is required.", required = true) @Nullable final SaveRequest request) {
    // Validate request
    if (request == null || (request.getJdbc() == null && request.getCatalogDatasource() == null && request.getFormat() == null)) {
        throw transformError(Response.Status.BAD_REQUEST, SparkShellProxyResources.SAVE_MISSING_FORMAT, null);
    // Add data source details
    // Add Catalog details
    // Execute request
    final SparkShellProcess process = getSparkShellProcess();
    return getSaveResponse(() -> Optional.of(restClient.saveQuery(process, queryId, request)));
Also used : SparkShellProcess( Path( POST( Consumes( Produces( Nonnull(javax.annotation.Nonnull) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)


SparkShellProcess ( ApiOperation (io.swagger.annotations.ApiOperation)7 ApiResponses (io.swagger.annotations.ApiResponses)7 Consumes ( Path ( Produces ( Nonnull (javax.annotation.Nonnull)6 POST ( TransformResponse ( SparkException (com.thinkbiganalytics.kylo.spark.SparkException)3 DataSet ( TransformRequest ( SparkShellSaveException ( SparkShellTransformException ( File ( IOException ( TimeoutException (java.util.concurrent.TimeoutException)2 BadRequestException ( NotFoundException ( WebApplicationException (