use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.
the class DataSetController method getUploads.
@GET
@Path("{id}/uploads")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Lists uploaded files for a data set.")
@ApiResponses({ @ApiResponse(code = 200, message = "List of uploaded files", response = DataSetFile.class, responseContainer = "List"), @ApiResponse(code = 404, message = "Data set does not exist", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "Failed to list uploaded files", response = RestResponseStatus.class) })
public Response getUploads(@PathParam("id") @UUID final String dataSetId) {
log.entry(dataSetId);
final DataSet dataSet = findDataSet(dataSetId, true);
final List<DataSetFile> files;
try {
log.debug("Listing uploaded files for dataset {}", dataSetId);
files = fileManager.listUploads(dataSet);
} catch (final Exception e) {
log.error("Unable to retrieve dataset uploads: {}", e, e);
throw new InternalServerErrorException(getMessage("catalog.dataset.getUploads.error"));
}
return log.exit(Response.ok(files).build());
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.
the class DefaultCatalogFileManager method createUpload.
@Nonnull
@Override
public DataSetFile createUpload(@Nonnull final DataSet dataSet, @Nonnull final String fileName, @Nonnull final InputStream in) throws IOException {
final Path path = getUploadPath(dataSet, fileName);
final List<DataSetFile> files = isolatedFunction(dataSet, path, fs -> {
log.debug("Creating file [{}] for dataset {}", fileName, dataSet.getId());
try (final FSDataOutputStream out = fs.create(path, false)) {
IOUtils.copyLarge(in, out);
}
if (username != null || groupname != null) {
log.debug("Changing owner of [{}] to {}:{}", path, username, groupname);
fs.setOwner(path, username, groupname);
}
if (permission != null) {
log.debug("Setting permissions of [{}] to {}", path, permission);
fs.setPermission(path, permission);
}
return listFiles(fs, path);
});
if (files.size() == 1) {
return files.get(0);
} else {
log.error("Failed to upload file for dataset {} at path: {}. Expected 1 file but found {} files.", dataSet.getId(), path, files.size());
throw new IOException("Uploaded file not found");
}
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.
the class CatalogFileManagerTest method listUploads.
/**
* Verify listing uploaded files.
*/
@Test
public void listUploads() throws IOException {
// Create data set including files
final DataSet dataSet = createDataSet();
final File dataSetFolder = datasetsFolder.newFolder(dataSet.getId());
Files.write("data1", new File(dataSetFolder, "file1.txt"), StandardCharsets.UTF_8);
Files.write("data2", new File(dataSetFolder, "file2.txt"), StandardCharsets.UTF_8);
Files.write("data3", new File(dataSetFolder, "file3.txt"), StandardCharsets.UTF_8);
// Test listing files
final CatalogFileManager fileManager = new MockCatalogFileManager();
final List<DataSetFile> files = fileManager.listUploads(dataSet);
Assert.assertThat(files, CoreMatchers.hasItem(equalTo("file1.txt", new Path(dataSetFolder.toPath().resolve("file1.txt").toUri()).toString(), false, 5, "data1")));
Assert.assertThat(files, CoreMatchers.hasItem(equalTo("file2.txt", new Path(dataSetFolder.toPath().resolve("file2.txt").toUri()).toString(), false, 5, "data2")));
Assert.assertThat(files, CoreMatchers.hasItem(equalTo("file3.txt", new Path(dataSetFolder.toPath().resolve("file3.txt").toUri()).toString(), false, 5, "data3")));
Assert.assertEquals(3, files.size());
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.
the class DataSourceIT method testListFilesS3.
/**
* Verify listing files from the Amazon S3 connector.
*/
@Test
public void testListFilesS3() {
Assume.assumeNotNull(awsAccessKeyId, awsSecretAccessKey);
// Create an S3 data source
final Connector connector = new Connector();
connector.setId("amazon-s3");
final DefaultDataSetTemplate template = new DefaultDataSetTemplate();
template.setOptions(new HashMap<>());
template.getOptions().put("spark.hadoop.fs.s3a.access.key", awsAccessKeyId);
template.getOptions().put("spark.hadoop.fs.s3a.secret.key", awsSecretAccessKey);
final DataSource request = new DataSource();
request.setConnector(connector);
request.setTemplate(template);
request.setTitle("test list files s3");
final DataSource dataSource = given(DataSourceController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSource.class);
// Test listing buckets
final List<DataSetFile> buckets = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "s3a:/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(buckets, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=thinkbig.greg directory=true") {
@Override
public boolean matches(final Object item) {
return (item instanceof DataSetFile) && Objects.equals("thinkbig.greg", ((DataSetFile) item).getName()) && Objects.equals("s3a://thinkbig.greg/", ((DataSetFile) item).getPath()) && ((DataSetFile) item).isDirectory();
}
}));
// Test listing files
final List<DataSetFile> files = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "s3a://thinkbig.greg/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(files, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=userdata1.csv directory=false") {
@Override
public boolean matches(Object item) {
return (item instanceof DataSetFile) && Objects.equals("userdata1.csv", ((DataSetFile) item).getName()) && Objects.equals("s3a://thinkbig.greg/userdata1.csv", ((DataSetFile) item).getPath()) && !((DataSetFile) item).isDirectory();
}
}));
}
use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.
the class DataSourceIT method testListFilesAzureNative.
/**
* Verify listing files from the Azure Storage connector.
*/
@Test
public void testListFilesAzureNative() {
Assume.assumeNotNull(azureAccountKey);
// Create an Azure data source
final Connector connector = new Connector();
connector.setId("azure-storage");
final DefaultDataSetTemplate template = new DefaultDataSetTemplate();
template.setOptions(Collections.singletonMap("spark.hadoop.fs.azure.account.key.kylogreg1.blob.core.windows.net", azureAccountKey));
final DataSource request = new DataSource();
request.setConnector(connector);
request.setTemplate(template);
request.setTitle("test list files wasb");
final DataSource dataSource = given(DataSourceController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSource.class);
// Test listing containers
final List<DataSetFile> containers = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "wasb://kylogreg1.blob.core.windows.net/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(containers, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=blob123 directory=true") {
@Override
public boolean matches(final Object item) {
return (item instanceof DataSetFile) && Objects.equals("blob123", ((DataSetFile) item).getName()) && Objects.equals("wasb://blob123@kylogreg1.blob.core.windows.net/", ((DataSetFile) item).getPath()) && ((DataSetFile) item).isDirectory();
}
}));
// Test listing files
final List<DataSetFile> files = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "wasb://blob123@kylogreg1.blob.core.windows.net/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
Assert.assertThat(files, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=books1.json directory=true") {
@Override
public boolean matches(final Object item) {
return (item instanceof DataSetFile) && Objects.equals("books1.json", ((DataSetFile) item).getName()) && Objects.equals("wasb://blob123@kylogreg1.blob.core.windows.net/books1.json", ((DataSetFile) item).getPath()) && !((DataSetFile) item).isDirectory();
}
}));
}
Aggregations