use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSource in project kylo by Teradata.
the class SparkShellProxyController method preview.
@POST
@Path("/preview")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Returns the dataset preview")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the status of the file-metadata job.", response = PreviewDataSetTransformResponse.class), @ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class) })
public Response preview(PreviewDataSetRequest previewRequest) {
DataSource catalogDataSource = fetchCatalogDataSource(previewRequest.getDataSource().getId());
previewRequest.setDataSource(catalogDataSource);
if (previewRequest.isFilePreview() && previewRequest.getSchemaParser() == null) {
// set it to a text preview
previewRequest.setSchemaParser(getTextSchemaParserDescriptor());
}
KyloCatalogReadRequest request = KyloCatalogReaderUtil.toKyloCatalogRequest(previewRequest);
final SparkShellProcess process = getSparkShellProcess();
return getTransformResponse(() -> {
PreviewDataSetTransformResponse response = null;
boolean fallbackToTextParser = previewRequest.isFallbackToTextOnError();
try {
TransformResponse transformResponse = restClient.kyloCatalogTransform(process, request);
response = new PreviewDataSetTransformResponse(transformResponse, previewRequest.getSchemaParser());
} catch (Exception e) {
// should we attempt to re preview the data as plain text
if (fallbackToTextParser && previewRequest.getSchemaParser() != null && !"text".equalsIgnoreCase(previewRequest.getSchemaParser().getSparkFormat())) {
previewRequest.setSchemaParser(getTextSchemaParserDescriptor());
KyloCatalogReadRequest request2 = KyloCatalogReaderUtil.toKyloCatalogRequest(previewRequest);
TransformResponse transformResponse = restClient.kyloCatalogTransform(process, request2);
response = new PreviewDataSetTransformResponse(transformResponse, previewRequest.getSchemaParser());
} else {
throw e;
}
throw e;
}
return response;
});
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSource in project kylo by Teradata.
the class KyloCatalogReaderUtil method toKyloCatalogRequest.
public static KyloCatalogReadRequest toKyloCatalogRequest(PreviewDataSetRequest previewRequest) {
DataSource dataSource = previewRequest.getDataSource();
Connector connector = dataSource.getConnector();
// merge template
DataSetTemplate dataSetTemplate = DataSourceUtil.mergeTemplates(dataSource);
// get data out of the dataset template
List<String> jars = dataSetTemplate.getJars();
List<String> paths = dataSetTemplate.getPaths();
List<String> files = dataSetTemplate.getFiles();
String format = dataSetTemplate.getFormat();
Map<String, String> options = dataSetTemplate.getOptions();
if (options == null) {
options = new HashMap<>();
}
// parse the SchemaParser if it exists and add options and update the format
if (previewRequest.getSchemaParser() != null) {
SchemaParserDescriptor schemaParser = previewRequest.getSchemaParser();
Map<String, String> sparkOptions = schemaParser.getProperties().stream().collect(Collectors.toMap(p -> p.getAdditionalProperties().stream().filter(labelValue -> "spark.option".equalsIgnoreCase(labelValue.getLabel())).map(labelValue -> labelValue.getValue()).findFirst().orElse(""), p -> p.getValue()));
// remove any options that produced an empty key
sparkOptions.remove("");
// supplied options by the schema parse take precedence over the template options
options.putAll(sparkOptions);
format = schemaParser.getSparkFormat();
}
// add in additional preview options
if (previewRequest.getProperties() != null && !previewRequest.getProperties().isEmpty()) {
options.putAll(previewRequest.getProperties());
}
KyloCatalogReadRequest request = new KyloCatalogReadRequest();
request.setFiles(files);
request.setJars(jars);
request.setFormat(format);
request.setOptions(options);
if (previewRequest.getPreviewItem() != null && previewRequest.isAddPreviewItemToPath()) {
request.addPath(previewRequest.getPreviewItem());
}
PageSpec pageSpec = previewRequest.getPageSpec();
if (pageSpec == null) {
pageSpec = new PageSpec();
}
request.setPageSpec(pageSpec);
return request;
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSource in project kylo by Teradata.
the class DataSourceIT method testListFilesS3.
/**
* Verify listing files from the Amazon S3 connector.
*/
@Test
public void testListFilesS3() {
Assume.assumeNotNull(awsAccessKeyId, awsSecretAccessKey);
// Create an S3 data source
final Connector connector = new Connector();
connector.setId("amazon-s3");
final DefaultDataSetTemplate template = new DefaultDataSetTemplate();
template.setOptions(new HashMap<>());
template.getOptions().put("spark.hadoop.fs.s3a.access.key", awsAccessKeyId);
template.getOptions().put("spark.hadoop.fs.s3a.secret.key", awsSecretAccessKey);
final DataSource request = new DataSource();
request.setConnector(connector);
request.setTemplate(template);
request.setTitle("test list files s3");
final DataSource dataSource = given(DataSourceController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSource.class);
// Test listing buckets
final List<DataSetFile> buckets = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "s3a:/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(buckets, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=thinkbig.greg directory=true") {
@Override
public boolean matches(final Object item) {
return (item instanceof DataSetFile) && Objects.equals("thinkbig.greg", ((DataSetFile) item).getName()) && Objects.equals("s3a://thinkbig.greg/", ((DataSetFile) item).getPath()) && ((DataSetFile) item).isDirectory();
}
}));
// Test listing files
final List<DataSetFile> files = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "s3a://thinkbig.greg/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(files, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=userdata1.csv directory=false") {
@Override
public boolean matches(Object item) {
return (item instanceof DataSetFile) && Objects.equals("userdata1.csv", ((DataSetFile) item).getName()) && Objects.equals("s3a://thinkbig.greg/userdata1.csv", ((DataSetFile) item).getPath()) && !((DataSetFile) item).isDirectory();
}
}));
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSource in project kylo by Teradata.
the class DataSourceIT method testListFilesAzureNative.
/**
* Verify listing files from the Azure Storage connector.
*/
@Test
public void testListFilesAzureNative() {
Assume.assumeNotNull(azureAccountKey);
// Create an Azure data source
final Connector connector = new Connector();
connector.setId("azure-storage");
final DefaultDataSetTemplate template = new DefaultDataSetTemplate();
template.setOptions(Collections.singletonMap("spark.hadoop.fs.azure.account.key.kylogreg1.blob.core.windows.net", azureAccountKey));
final DataSource request = new DataSource();
request.setConnector(connector);
request.setTemplate(template);
request.setTitle("test list files wasb");
final DataSource dataSource = given(DataSourceController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSource.class);
// Test listing containers
final List<DataSetFile> containers = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "wasb://kylogreg1.blob.core.windows.net/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(containers, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=blob123 directory=true") {
@Override
public boolean matches(final Object item) {
return (item instanceof DataSetFile) && Objects.equals("blob123", ((DataSetFile) item).getName()) && Objects.equals("wasb://blob123@kylogreg1.blob.core.windows.net/", ((DataSetFile) item).getPath()) && ((DataSetFile) item).isDirectory();
}
}));
// Test listing files
final List<DataSetFile> files = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "wasb://blob123@kylogreg1.blob.core.windows.net/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(files, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=books1.json directory=true") {
@Override
public boolean matches(final Object item) {
return (item instanceof DataSetFile) && Objects.equals("books1.json", ((DataSetFile) item).getName()) && Objects.equals("wasb://blob123@kylogreg1.blob.core.windows.net/books1.json", ((DataSetFile) item).getPath()) && !((DataSetFile) item).isDirectory();
}
}));
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSource in project kylo by Teradata.
the class IntegrationTestBase method deleteExistingDatasources.
protected void deleteExistingDatasources() {
LOG.info("Deleting existing Datasources");
Datasource[] datasources = getDatasources();
for (Datasource datasource : datasources) {
deleteDatasource(datasource.getId());
}
datasources = getDatasources();
Assert.assertTrue(datasources.length == 0);
DataSource[] jdbcDataSources = getJdbcDataSources();
if (jdbcDataSources != null) {
for (DataSource dataSource : jdbcDataSources) {
deleteDataSource(dataSource.getId());
}
}
jdbcDataSources = getJdbcDataSources();
Assert.assertTrue(jdbcDataSources.length == 0);
}
Aggregations