use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class SparkShellProxyController method getServerStatus.
@GET
@Path(SERVER_STATUS)
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Fetches the status of a transformation.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the spark server and session for current user if applicable.", response = ServerStatusResponse.class), @ApiResponse(code = 500, message = "There was a problem checking the spark server.", response = RestResponseStatus.class) })
@Nonnull
public Response getServerStatus() {
try {
final SparkShellProcess process = getSparkShellProcess();
final ServerStatusResponse serverStatusResponse = restClient.serverStatus(process);
return Response.ok(serverStatusResponse).build();
} catch (Exception e) {
throw new WebApplicationException("Unhandled exception attempting to get server status", e);
}
}
use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class SparkShellProxyController method query.
/**
* Executes a SQL query.
*
* @param request the query request
* @return the query status
*/
@POST
@Path("/query")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Queries a data source table.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the query.", response = TransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
@Nonnull
public Response query(@ApiParam(value = "The request indicates the query to execute. Exactly one source must be specified.", required = true) @Nullable final TransformRequest request) {
// Validate request
if (request == null || request.getScript() == null) {
throw transformError(Response.Status.BAD_REQUEST, "query.missingScript", null);
}
// Add data source details
addDatasourceDetails(request);
// Add Catalog details
addCatalogDataSets(request);
// Execute request
final SparkShellProcess process = getSparkShellProcess();
return getTransformResponse(() -> restClient.query(process, request));
}
use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class SparkFileSchemaParserService method doParse.
/**
* Delegate to spark shell service to load the file into a temporary table and loading it
*/
public Schema doParse(InputStream inputStream, SparkFileType fileType, TableSchemaType tableSchemaType, SparkCommandBuilder commandBuilder) throws IOException {
File tempFile = toFile(inputStream);
try {
SparkShellProcess shellProcess = shellProcessManager.getSystemProcess();
TransformResponse response = restClient.transform(shellProcess, createTransformRequest(tempFile, fileType, commandBuilder));
while (response.getStatus() != TransformResponse.Status.SUCCESS) {
if (response.getStatus() == TransformResponse.Status.ERROR) {
throw new IOException("Failed to process data [" + response.getMessage() + "]");
} else {
Uninterruptibles.sleepUninterruptibly(100L, TimeUnit.MILLISECONDS);
}
final Optional<TransformResponse> optionalResponse = restClient.getTransformResult(shellProcess, response.getTable());
if (optionalResponse.isPresent()) {
response = optionalResponse.get();
}
}
return toSchema(response.getResults(), fileType, tableSchemaType);
} catch (Exception e) {
log.error("Error parsing file {}: {}", fileType, e.getMessage());
throw new IOException("Unexpected exception. Verify file is the proper format", e);
} finally {
if (!tempFile.delete()) {
log.error("The temp file was not deleted successfully: " + tempFile.getName());
}
}
}
use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class SparkShellProxyController method fileMetadata.
@POST
@Path(FILE_METADATA)
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("returns filemetadata based upon the list of file paths in the dataset.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the file-metadata job.", response = TransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
public Response fileMetadata(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet dataSet) {
TransformRequest request = new TransformRequest();
DataSet decrypted = catalogModelTransform.decryptOptions(dataSet);
request.setScript(FileMetadataScalaScriptGenerator.getScript(DataSetUtil.getPaths(decrypted).orElseGet(Collections::emptyList), DataSetUtil.mergeTemplates(decrypted).getOptions()));
final SparkShellProcess process = getSparkShellProcess();
return getModifiedTransformResponse(() -> Optional.of(restClient.transform(process, request)), new FileMetadataTransformResponseModifier(fileMetadataTrackerService));
}
use of com.thinkbiganalytics.spark.shell.SparkShellProcess in project kylo by Teradata.
the class SparkShellProxyController method preview.
@POST
@Path("/preview")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Returns the dataset preview")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the file-metadata job.", response = PreviewDataSetTransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
public Response preview(PreviewDataSetRequest previewRequest) {
DataSource catalogDataSource = fetchCatalogDataSource(previewRequest.getDataSource().getId());
previewRequest.setDataSource(catalogDataSource);
if (previewRequest.isFilePreview() && previewRequest.getSchemaParser() == null) {
// set it to a text preview
previewRequest.setSchemaParser(getTextSchemaParserDescriptor());
}
KyloCatalogReadRequest request = KyloCatalogReaderUtil.toKyloCatalogRequest(previewRequest);
final SparkShellProcess process = getSparkShellProcess();
return getTransformResponse(() -> {
PreviewDataSetTransformResponse response = null;
boolean fallbackToTextParser = previewRequest.isFallbackToTextOnError();
try {
TransformResponse transformResponse = restClient.kyloCatalogTransform(process, request);
response = new PreviewDataSetTransformResponse(transformResponse, previewRequest.getSchemaParser());
} catch (Exception e) {
// should we attempt to re preview the data as plain text
if (fallbackToTextParser && previewRequest.getSchemaParser() != null && !"text".equalsIgnoreCase(previewRequest.getSchemaParser().getSparkFormat())) {
previewRequest.setSchemaParser(getTextSchemaParserDescriptor());
KyloCatalogReadRequest request2 = KyloCatalogReaderUtil.toKyloCatalogRequest(previewRequest);
TransformResponse transformResponse = restClient.kyloCatalogTransform(process, request2);
response = new PreviewDataSetTransformResponse(transformResponse, previewRequest.getSchemaParser());
} else {
throw e;
}
throw e;
}
return response;
});
}
Aggregations